PCA-HMM (hard regimes, MV per-label, “paper” windows)¶
#!/usr/bin/env python3
# Test 2/5 — PCA-HMM (cross-asset structure, hard regimes) — FAIR VERSION + SAFE TURNOVER PLOT
# -------------------------------------------------------------------------
# Your requests:
# - Keep original (non-causal) regime weights estimation (uses full-sample labels) ✅
# Kept fixes:
# - Constant-vol Risk-Parity baseline (IVP, rolling)
# - Risk-matched (target-vol) headline table @10% vol
# - Net-of-costs with turnover + cost grid (5/10/25/50 bps)
# - Subperiod panels (2000–09 / 2010–19 / 2020–25)
# - ES95 metric; data manifest
# - SAFE turnover distribution plot (histogram/spike fallback; no SciPy KDE)
# -------------------------------------------------------------------------
import os, glob, warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# ---------------- Config ----------------
DATA_DIR = "./data2"
OUT_DIR = "./out1"
K = 3
ROLL_PC_WIN = 63 # window for PCA feature (covariance)
ROLL_FIT_N = 500 # HMM fit window (days)
REFIT_EVERY = 5
RIDGE = 1e-6
CLIP = 0.5
SEED = 7
# Reporting / fairness
TARGET_VOL = 0.10 # annualized target vol for matched-vol table
COST_BPS_GRID = [5, 10, 25, 50]
IVP_WIN = 63 # lookback for inverse-vol baseline
IVP_REBAL = 21 # rebalance every N trading days
try:
from hmmlearn.hmm import GaussianHMM
except Exception as e:
raise SystemExit("pip install hmmlearn\nOriginal error: %r" % e)
np.random.seed(SEED)
os.makedirs(OUT_DIR, exist_ok=True)
# --------------- Helpers ----------------
def load_data_from_dir(data_dir: str) -> pd.DataFrame:
files = sorted(glob.glob(os.path.join(data_dir, "*.csv")))
if not files:
raise FileNotFoundError(f"No CSVs found in {data_dir}")
frames = []
for f in files:
tkr = os.path.splitext(os.path.basename(f))[0].upper()
df = pd.read_csv(f)
if "Date" not in df.columns and "date" in df.columns:
df.rename(columns={"date": "Date"}, inplace=True)
close_col = None
for c in ["Close", "Adj Close", "close", "adj_close", "AdjClose"]:
if c in df.columns:
close_col = c; break
if close_col is None and "Zamkniecie" in df.columns:
close_col = "Zamkniecie"
if close_col is None:
print(f"Skipping {tkr}: no Close/Adj Close column.")
continue
df["Date"] = pd.to_datetime(df["Date"])
df = df[["Date", close_col]].dropna()
df.rename(columns={close_col: tkr}, inplace=True)
frames.append(df)
if not frames:
raise ValueError("No usable files with Close prices.")
out = frames[0]
for df in frames[1:]:
out = out.merge(df, on="Date", how="outer")
out.sort_values("Date", inplace=True)
out.set_index("Date", inplace=True)
return out
def to_log_returns(prices: pd.DataFrame) -> pd.DataFrame:
return np.log(prices/prices.shift(1))
def mean_var_weights(mu: np.ndarray, Sigma: np.ndarray, ridge=1e-6, clip=0.5):
n = len(mu)
S = Sigma + ridge*np.eye(n)
w = np.linalg.pinv(S).dot(mu.reshape(-1,1)).ravel()
w = w/np.abs(w).sum() if np.abs(w).sum()>0 else np.ones(n)/n
w = np.clip(w, -clip, clip)
w = w/np.abs(w).sum() if np.abs(w).sum()>0 else np.ones(n)/n
return w
def ann_kpis_from_returns(rets: pd.Series, freq=252):
rets = rets.dropna()
if rets.empty:
return dict(ret=np.nan, vol=np.nan, sharpe=np.nan, maxdd=np.nan, es95=np.nan)
curve = (1+rets).cumprod()
maxdd = (curve/curve.cummax()-1).min()
ann_ret = (1+rets).prod()**(freq/len(rets)) - 1
ann_vol = rets.std()*np.sqrt(freq)
sharpe = ann_ret/ann_vol if ann_vol>0 else np.nan
q = np.quantile(rets, 0.05)
es95 = -rets[rets<=q].mean() if (rets<=q).any() else np.nan
return dict(ret=ann_ret, vol=ann_vol, sharpe=sharpe, maxdd=maxdd, es95=es95)
def pca_pc1_var_feature(R: pd.DataFrame, win=63):
"""Return:
- pc1_share_series: log(PC1 variance share) indexed by the *window end* date
- last_loadings: PC1 eigenvector (unit length) from the *last* window (for plotting)
"""
from numpy.linalg import eigh
vals, idxs = [], []
last_loadings = None
last_cols = R.columns
for i in range(win-1, len(R)):
X = R.iloc[i-win+1:i+1].dropna(how="any")
if len(X) < win:
continue
C = X.cov().values
evals, evecs = eigh(C) # ascending order
lam1 = float(evals[-1])
total = float(evals.sum()) + 1e-12
pc1_share = lam1 / total
vals.append(np.log(pc1_share + 1e-12))
idxs.append(R.index[i])
last_loadings = evecs[:, -1] # associated eigenvector
last_cols = X.columns
s = pd.Series(vals, index=idxs, name="x_pc1_share_log")
return s, (last_cols, last_loadings)
def smooth_drawdown_from_returns(rets: pd.Series):
curve = (1+rets.fillna(0)).cumprod()
dd = curve/curve.cummax() - 1.0
return curve, dd
def inverse_vol_weights(R_window: pd.DataFrame):
vol = R_window.std().replace(0, np.nan)
w = 1.0/vol
w = w.replace([np.inf, -np.inf], np.nan).fillna(0.0)
if w.sum() > 0:
w = w / w.sum()
else:
w = pd.Series(np.ones(len(R_window.columns))/len(R_window.columns), index=R_window.columns)
return w.values
def turnover_series(W: pd.DataFrame):
"""One-way turnover per day: 0.5 * sum(|w_t - w_{t-1}|)."""
Wf = W.fillna(method="ffill").fillna(0.0)
dW = (Wf - Wf.shift(1)).abs().sum(axis=1) * 0.5
dW.iloc[0] = 0.0
return dW
def apply_costs(gross: pd.Series, turnover: pd.Series, cost_bps: float):
c = cost_bps/10000.0
return gross - c*turnover
def risk_match(rets: pd.Series, target_vol=0.10, freq=252):
vol = rets.std()*np.sqrt(freq)
if not np.isfinite(vol) or vol<=0:
return rets.copy(), 1.0
alpha = target_vol/vol
return rets*alpha, alpha
# --- ORIGINAL (non-causal) regime weights estimation -------------------
def regime_weights_from_labels(R: pd.DataFrame, labels: pd.Series, ridge=1e-6, clip=0.5):
N = R.shape[1]
out = {}
for k in sorted(labels.dropna().unique()):
idx = labels.index[labels==k]
if len(idx) < max(60, N*5):
w = np.ones(N)/N
else:
sub = R.loc[idx]
mu = sub.mean().values
Sigma = sub.cov().values
w = mean_var_weights(mu, Sigma, ridge=ridge, clip=clip)
out[int(k)] = w
return out
# ---------------- Load & prep ----------------
prices = load_data_from_dir(DATA_DIR).ffill().dropna(how="all")
tickers = [c for c in prices.columns if prices[c].notna().sum()>0]
prices = prices[tickers].dropna()
R = to_log_returns(prices).dropna()
N = R.shape[1]
print(f"Loaded {N} assets: {', '.join(R.columns)}")
# Data manifest (repro)
manifest = pd.DataFrame({
"Ticker": R.columns,
"FirstDate": [R.index.min()]*N,
"LastDate": [R.index.max()]*N,
"Obs": [R[c].dropna().shape[0] for c in R.columns]
})
manifest.to_csv(os.path.join(OUT_DIR, "manifest_pca_hmm.csv"), index=False)
# Feature: log(PC1 variance share)
x_pc1, (cols_last, loadings_last) = pca_pc1_var_feature(R, win=ROLL_PC_WIN)
x_df = x_pc1.to_frame()
R_aligned = R.loc[x_df.index].copy()
dates = x_df.index
# ---------------- Rolling HMM (labels) ----------------
labels = pd.Series(index=dates, dtype=int)
trans_mats = {}
model = None
last_fit = None
for i, dt in enumerate(dates):
refit = (last_fit is None) or ((i - last_fit) >= REFIT_EVERY)
if refit:
start_idx = max(0, i - ROLL_FIT_N)
fit_slice = x_df.iloc[start_idx:i+1].values
if len(fit_slice) < max(120, ROLL_FIT_N//4):
labels.iloc[i] = 0
continue
model = GaussianHMM(n_components=K, covariance_type='diag', n_iter=200, random_state=SEED)
model.fit(fit_slice)
last_fit = i
trans_mats[dt] = model.transmat_.copy()
start_idx = max(0, i - ROLL_FIT_N)
window = x_df.iloc[start_idx:i+1].values
path = model.predict(window)
labels.iloc[i] = path[-1]
# Remap states calm→mid→stress by feature mean
means = [(k, x_df.loc[labels==k, "x_pc1_share_log"].mean()) for k in range(K)]
order = [k for (k, _) in sorted(means, key=lambda z: z[1])]
remap = {old:new for new, old in enumerate(order)}
labels = labels.map(remap)
# ---------------- Regime weights (NON-CAUSAL, original) ---------------
regime_w = regime_weights_from_labels(R_aligned, labels, ridge=RIDGE, clip=CLIP)
# ---------------- Strategy (hard regimes) ------------------------------
W_dyn = pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
prev_label = None
for dt in R_aligned.index:
if prev_label is None:
W_dyn.loc[dt] = np.ones(N)/N
else:
w = regime_w.get(int(prev_label), np.ones(N)/N)
W_dyn.loc[dt] = w
prev_label = labels.loc[dt] if pd.notna(labels.loc[dt]) else prev_label
dyn_rets = (R_aligned * W_dyn.shift(1)).sum(axis=1).fillna(0.0)
dyn_turn = turnover_series(W_dyn)
# ---------------- Baselines -------------------------------------------
# Static Mean-Var
Sigma_full = R_aligned.cov().values
mu_full = R_aligned.mean().values
w_static = mean_var_weights(mu_full, Sigma_full, ridge=RIDGE, clip=CLIP)
W_static = pd.DataFrame([w_static]*len(R_aligned), index=R_aligned.index, columns=R_aligned.columns)
static_rets = (R_aligned @ pd.Series(w_static, index=R_aligned.columns)).fillna(0.0)
static_turn = turnover_series(W_static)
# Equal-Weight
ew_w = np.ones(N)/N
W_ew = pd.DataFrame([ew_w]*len(R_aligned), index=R_aligned.index, columns=R_aligned.columns)
ew_rets = (R_aligned @ pd.Series(ew_w, index=R_aligned.columns)).fillna(0.0)
ew_turn = turnover_series(W_ew)
# Constant-Vol Risk-Parity (IVP), rolling
W_ivp = pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
for i, dt in enumerate(R_aligned.index):
if i < IVP_WIN:
W_ivp.iloc[i] = np.ones(N)/N
else:
if i % IVP_REBAL == 0 or i == IVP_WIN:
Rw = R_aligned.iloc[i-IVP_WIN:i]
w = inverse_vol_weights(Rw)
W_ivp.iloc[i] = w
else:
W_ivp.iloc[i] = W_ivp.iloc[i-1]
W_ivp = W_ivp.fillna(method="ffill").fillna(1.0/N)
ivp_rets = (R_aligned * W_ivp.shift(1)).sum(axis=1).fillna(0.0)
ivp_turn = turnover_series(W_ivp)
# ---------------- KPIs: native (pre-cost) -----------------------------
def kpi_row(name, rets):
k = ann_kpis_from_returns(rets)
return [name, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']]
kpi_native = pd.DataFrame(
[kpi_row('Equal-Weight', ew_rets),
kpi_row('Static Mean-Var', static_rets),
kpi_row('IVP (Const-Vol RP, rolling)', ivp_rets),
kpi_row('PCA-HMM Hard-Regime', dyn_rets)],
columns=['Strategy','Ann.Return','Ann.Vol','Sharpe','MaxDD','ES95_day']
)
kpi_native_path = os.path.join(OUT_DIR, "kpi_pca_native.csv")
kpi_native.to_csv(kpi_native_path, index=False)
print("\n=== KPI (native, pre-cost) ===")
print(kpi_native.to_string(index=False, float_format=lambda x: f"{x:,.4f}"))
print(f"Saved → {kpi_native_path}")
# ---------------- Risk-matched (target vol) headline table ------------
def matched_table(name, rets, turn):
rm_rets, alpha = risk_match(rets, TARGET_VOL)
row = ann_kpis_from_returns(rm_rets)
return name, rm_rets, turn*alpha, alpha, row
rows = []
matched_series = {}
turn_series = {}
alphas = {}
for name, rets, turn in [
('Equal-Weight', ew_rets, ew_turn),
('Static Mean-Var', static_rets, static_turn),
('IVP (Const-Vol RP, rolling)', ivp_rets, ivp_turn),
('PCA-HMM Hard-Regime', dyn_rets, dyn_turn),
]:
nm, rm_rets, rm_turn, alpha, row = matched_table(name, rets, turn)
matched_series[nm] = rm_rets
turn_series[nm] = rm_turn
alphas[nm] = alpha
rows.append([nm, row['ret'], row['vol'], row['sharpe'], row['maxdd'], row['es95'], alpha])
kpi_matched = pd.DataFrame(rows, columns=['Strategy','Ann.Return','Ann.Vol','Sharpe','MaxDD','ES95_day','Alpha(scale)'])
kpi_matched_path = os.path.join(OUT_DIR, "kpi_pca_matchedVol.csv")
kpi_matched.to_csv(kpi_matched_path, index=False)
print("\n=== KPI (risk-matched @ {:.0f}%) ===".format(TARGET_VOL*100))
print(kpi_matched.to_string(index=False, float_format=lambda x: f"{x:,.4f}"))
print(f"Saved → {kpi_matched_path}")
# ---------------- Net-of-costs grids (on risk-matched) -----------------
def cost_grid_table(cost_bps_list):
out = []
for c in cost_bps_list:
for name in matched_series:
net = apply_costs(matched_series[name], turn_series[name], c)
k = ann_kpis_from_returns(net)
out.append([c, name, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']])
df = pd.DataFrame(out, columns=['Cost_bps','Strategy','Ann.Return','Ann.Vol','Sharpe','MaxDD','ES95_day'])
return df
kpi_costs = cost_grid_table(COST_BPS_GRID)
kpi_costs_path = os.path.join(OUT_DIR, "kpi_pca_matchedVol_costGrid.csv")
kpi_costs.to_csv(kpi_costs_path, index=False)
print(f"\nSaved net-of-costs grid → {kpi_costs_path}")
# ---------------- Turnover stats table ---------------------------------
turn_stats = []
for name, ts in turn_series.items():
turn_stats.append([name, ts.mean(), ts.median(), ts.quantile(0.9), ts.max()])
turn_df = pd.DataFrame(turn_stats, columns=['Strategy','Turnover_mean','Turnover_median','Turnover_p90','Turnover_max'])
turn_df_path = os.path.join(OUT_DIR, "turnover_stats.csv")
turn_df.to_csv(turn_df_path, index=False)
print(f"Saved turnover stats → {turn_df_path}")
# ---------------- Subperiod panels (risk-matched, 3 eras) -------------
def subperiod_slices(idx):
eras = [
("2000-01-01","2009-12-31"),
("2010-01-01","2019-12-31"),
("2020-01-01", str(idx.max().date()))
]
out = []
for s,e in eras:
sdt = pd.Timestamp(s); edt = pd.Timestamp(e)
mask = (idx>=sdt) & (idx<=edt)
if mask.any():
out.append((s, e, mask))
return out
subs_rows = []
for (s,e,mask) in subperiod_slices(R_aligned.index):
for name in matched_series:
rets = matched_series[name].loc[mask]
k = ann_kpis_from_returns(rets)
subs_rows.append([f"{s}–{e}", name, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']])
sub_df = pd.DataFrame(subs_rows, columns=['Period','Strategy','Ann.Return','Ann.Vol','Sharpe','MaxDD','ES95_day'])
sub_df_path = os.path.join(OUT_DIR, "kpi_pca_matchedVol_subperiods.csv")
sub_df.to_csv(sub_df_path, index=False)
print(f"Saved subperiod KPIs → {sub_df_path}")
# ---------------- Figures -------------------
# SAFE helper: turnover distribution without SciPy KDE
def plot_turnover_distribution(turn_map, out_path):
fig, ax = plt.subplots(figsize=(10,4))
for name, ts in turn_map.items():
s = pd.Series(ts).replace([np.inf, -np.inf], np.nan).dropna()
s = s[np.isfinite(s)]
if (len(s) == 0) or (s.std() <= 1e-12) or (s.nunique() < 3):
val = float(s.iloc[0]) if len(s) else 0.0
ax.axvline(val, linestyle="--", linewidth=1.5, label=f"{name} (spike)")
else:
counts, bins = np.histogram(s, bins=40, density=True)
centers = 0.5*(bins[1:] + bins[:-1])
ax.plot(centers, counts, linewidth=1.5, label=name)
ax.set_title("Turnover distribution (one-way, risk-matched)")
ax.grid(True); ax.legend()
fig.tight_layout(); fig.savefig(out_path, dpi=160)
# 0) Turnover distribution (risk-matched scaled)
plot_turnover_distribution(turn_series, os.path.join(OUT_DIR, "fig_turnover_density.png"))
# 1) PC1 variance share (level, not log)
pc1_share_level = np.exp(x_pc1) # back from log
plt.figure(figsize=(12,3.2))
plt.plot(pc1_share_level.index, pc1_share_level.values)
plt.title("PC1 Variance Share (rolling covariance)")
plt.grid(True); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "fig_pc1_share.png"), dpi=160)
# 2) EW price w/ regime ribbon
ew_price_curve = (1 + (R_aligned.mean(axis=1).fillna(0.0))).cumprod()
fig, ax = plt.subplots(figsize=(12,4))
ax.plot(ew_price_curve.index, ew_price_curve.values, linewidth=1.2)
ax.set_title("EW Price with Regime Ribbon (PCA-HMM, Hard)")
ax.grid(True)
reg_series = labels.reindex(ew_price_curve.index).fillna(method='ffill')
last = None
for i, dt in enumerate(ew_price_curve.index):
r = int(reg_series.iloc[i]) if not pd.isna(reg_series.iloc[i]) else None
if i==0:
last = (dt, r)
elif r != last[1]:
ax.axvspan(last[0], ew_price_curve.index[i-1], alpha=0.10)
last = (dt, r)
ax.axvspan(last[0], ew_price_curve.index[-1], alpha=0.10)
fig.tight_layout()
fig.savefig(os.path.join(OUT_DIR, "fig_regimes_ribbon.png"), dpi=160)
# 3) Transition matrix heatmap (last fit)
if len(trans_mats):
last_A = list(trans_mats.values())[-1]
plt.figure(figsize=(4,3))
plt.imshow(last_A, aspect='auto')
plt.colorbar()
plt.title("Transition Matrix (last fit)")
plt.xticks(range(K), [f"{i}" for i in range(K)])
plt.yticks(range(K), [f"{i}" for i in range(K)])
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "fig_transmat_heat.png"), dpi=160)
# 4) Regime MV weights (bar) — display (last window)
plt.figure(figsize=(10,3.2))
disp_reg_w = {}
Lw = labels.iloc[-ROLL_FIT_N:]
Rw = R_aligned.iloc[-ROLL_FIT_N:]
for k in range(K):
idx_k = Lw.index[Lw==k]
if len(idx_k) < max(60, N*5):
disp_reg_w[k] = np.ones(N)/N
else:
mu = Rw.loc[idx_k].mean().values
Sigma = Rw.loc[idx_k].cov().values
disp_reg_w[k] = mean_var_weights(mu, Sigma, ridge=RIDGE, clip=CLIP)
bar_data = np.array([disp_reg_w.get(k, np.ones(N)/N) for k in range(K)])
xpos = np.arange(N); width = 0.8 / K
for k in range(K):
plt.bar(xpos + k*width, bar_data[k], width=width, label=f"Regime {k}")
plt.xticks(xpos + width*(K-1)/2, R_aligned.columns, rotation=0)
plt.title("Mean–Variance Weights by Regime (display, last window)")
plt.legend(ncol=min(3,K)); plt.grid(True, axis='y'); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "fig_regime_weights_bar.png"), dpi=160)
# 5) PC1 loadings (last window)
if loadings_last is not None:
plt.figure(figsize=(10,3.2))
vals = np.abs(loadings_last)
xpos = np.arange(len(cols_last))
plt.bar(xpos, vals)
plt.xticks(xpos, cols_last, rotation=0)
plt.title("PC1 Loadings (last rolling window)")
plt.grid(True, axis='y')
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "fig_pca_loadings_last.png"), dpi=160)
# 6) Log cumulative wealth (risk-matched, pre-cost)
plt.figure(figsize=(10,4))
for name, rets in matched_series.items():
curve = (1+rets).cumprod()
plt.plot(curve.index, np.log(curve), label=name)
plt.title("Log Cumulative Wealth (risk-matched, pre-cost)")
plt.legend(); plt.grid(True); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "fig_cum_wealth_matched.png"), dpi=160)
# 7) Drawdowns (risk-matched, pre-cost)
def ddplot_from_rets(name, rets):
curve, dd = smooth_drawdown_from_returns(rets)
plt.figure(figsize=(10,3))
plt.plot(dd.index, dd.values)
plt.title(f"Drawdown (risk-matched): {name}")
plt.grid(True); plt.tight_layout()
f = f"fig_drawdown_matched_{name.replace(' ','_').replace('-','').lower()}.png"
plt.savefig(os.path.join(OUT_DIR, f), dpi=160)
for name, rets in matched_series.items():
ddplot_from_rets(name, rets)
print(f"\nAll outputs saved in: {os.path.abspath(OUT_DIR)}")
Loaded 13 assets: ACWI, BIL, GLD, GOVT, IEF, LQD, QQQ, SHV, SPY, TIP, TLT, URTH, VTI
Model is not converging. Current: 412.05708234156646 is not greater than 412.25667257937704. Delta is -0.1995902378105825 Model is not converging. Current: 402.7170148008754 is not greater than 402.72015698867034. Delta is -0.0031421877949355803 Model is not converging. Current: 548.3737935063608 is not greater than 548.37978827715. Delta is -0.0059947707892433755 Model is not converging. Current: 458.04369356984495 is not greater than 458.04467758021275. Delta is -0.0009840103678016021 Model is not converging. Current: 464.83491798200885 is not greater than 464.84100816510386. Delta is -0.0060901830950115254 Model is not converging. Current: 469.7994319010304 is not greater than 469.80392097771005. Delta is -0.004489076679647042 Model is not converging. Current: 474.4007384413255 is not greater than 474.403379971688. Delta is -0.0026415303625526576 Model is not converging. Current: 479.0086608555064 is not greater than 479.0094556877677. Delta is -0.0007948322613060554 Model is not converging. Current: 493.47385179509746 is not greater than 493.474682429957. Delta is -0.0008306348595397139 Model is not converging. Current: 498.94655364230124 is not greater than 498.94681609077185. Delta is -0.00026244847060752363 Model is not converging. Current: 554.5222344478406 is not greater than 554.5231419669689. Delta is -0.0009075191283045569 Model is not converging. Current: 648.6166496695652 is not greater than 648.6230340005968. Delta is -0.006384331031654256 Model is not converging. Current: 650.6273258593816 is not greater than 650.6339801327547. Delta is -0.006654273373101205 Model is not converging. Current: 601.9490902850579 is not greater than 601.9548589034861. Delta is -0.005768618428191985 Model is not converging. Current: 613.985342592658 is not greater than 613.9919069074178. Delta is -0.006564314759884837 Model is not converging. Current: 613.7151832723503 is not greater than 613.7166058711778. Delta is -0.0014225988275029522 Model is not converging. Current: 618.8342482801512 is not greater than 618.8343369381384. Delta is -8.865798724855267e-05 Model is not converging. Current: 576.2978718078684 is not greater than 576.3065545490861. Delta is -0.008682741217626244 Model is not converging. Current: 563.0572157864834 is not greater than 563.0594077707646. Delta is -0.0021919842811257695 Model is not converging. Current: 656.1601844149233 is not greater than 656.1717713153429. Delta is -0.011586900419615631 Model is not converging. Current: 654.4719437823305 is not greater than 654.4723628116546. Delta is -0.00041902932412085647 Model is not converging. Current: 689.4799488728324 is not greater than 689.4996453698827. Delta is -0.019696497050290418 Model is not converging. Current: 674.3351453065084 is not greater than 674.3443046440252. Delta is -0.009159337516848609 Model is not converging. Current: 674.1392438012732 is not greater than 674.1427291401491. Delta is -0.0034853388758619985 Model is not converging. Current: 708.9170892738368 is not greater than 708.9331318065316. Delta is -0.016042532694768852 Model is not converging. Current: 680.8170834978882 is not greater than 680.8191496572202. Delta is -0.002066159332002826 Model is not converging. Current: 704.4710470524119 is not greater than 704.4773219052396. Delta is -0.0062748528276870275 Model is not converging. Current: 708.5815385020234 is not greater than 708.5906692437209. Delta is -0.009130741697504163 Model is not converging. Current: 714.7325666648908 is not greater than 714.7349968885936. Delta is -0.002430223702845069 Model is not converging. Current: 616.2086402925729 is not greater than 616.2150636334702. Delta is -0.006423340897299568 Model is not converging. Current: 648.9979551877134 is not greater than 648.9989369203738. Delta is -0.0009817326604206755 Model is not converging. Current: 807.6300509384655 is not greater than 807.6384437711062. Delta is -0.00839283264065216 Model is not converging. Current: 808.9197032423106 is not greater than 808.9254432464697. Delta is -0.00574000415917908 Model is not converging. Current: 817.7482139748148 is not greater than 817.7488497110867. Delta is -0.0006357362718745208 Model is not converging. Current: 842.1303118904967 is not greater than 842.1315200774062. Delta is -0.0012081869094799913 Model is not converging. Current: 855.1886766025202 is not greater than 855.2047389166711. Delta is -0.016062314150872226 Model is not converging. Current: 853.6677937560358 is not greater than 853.68758976003. Delta is -0.019796003994201783 Model is not converging. Current: 843.0689164515139 is not greater than 843.0719885288169. Delta is -0.003072077302931575 Model is not converging. Current: 843.8445164006804 is not greater than 843.845370168209. Delta is -0.0008537675286106605 Model is not converging. Current: 852.7703008049232 is not greater than 852.7743746773743. Delta is -0.004073872451158422 Model is not converging. Current: 872.2837907737804 is not greater than 872.2896560901702. Delta is -0.005865316389758846 Model is not converging. Current: 878.6129090009238 is not greater than 878.6296856306952. Delta is -0.016776629771470652 Model is not converging. Current: 910.8490765677682 is not greater than 910.8587981314727. Delta is -0.00972156370448829 Model is not converging. Current: 920.0616191782657 is not greater than 920.063827954814. Delta is -0.0022087765482865507 Model is not converging. Current: 919.095514233573 is not greater than 919.1398364074405. Delta is -0.04432217386749926 Model is not converging. Current: 917.6305829876269 is not greater than 917.688896354362. Delta is -0.05831336673509213 Model is not converging. Current: 913.5313810322899 is not greater than 913.5793731806768. Delta is -0.04799214838692478 Model is not converging. Current: 914.6570669094224 is not greater than 914.6835833063179. Delta is -0.026516396895544858 Model is not converging. Current: 907.2671241149268 is not greater than 907.2994594092937. Delta is -0.03233529436693061 Model is not converging. Current: 910.8472404058243 is not greater than 910.8726385659303. Delta is -0.025398160105964962 Model is not converging. Current: 914.7052019674232 is not greater than 914.7750731462587. Delta is -0.0698711788354558 Model is not converging. Current: 864.9538674984052 is not greater than 864.9767809476525. Delta is -0.0229134492473122 Model is not converging. Current: 741.3025152468533 is not greater than 741.311650526179. Delta is -0.009135279325732881 Model is not converging. Current: 747.0330781434175 is not greater than 747.0684088259574. Delta is -0.03533068253989313 Model is not converging. Current: 747.5537055235167 is not greater than 747.555151791229. Delta is -0.001446267712367444 Model is not converging. Current: 767.1673054551487 is not greater than 767.181068173419. Delta is -0.013762718270299956 Model is not converging. Current: 772.3067865746947 is not greater than 772.3132942798785. Delta is -0.006507705183821599 Model is not converging. Current: 783.8850194086682 is not greater than 783.927797921195. Delta is -0.04277851252675191 Model is not converging. Current: 793.266189928237 is not greater than 793.290970168251. Delta is -0.02478024001391077 Model is not converging. Current: 802.3028458280426 is not greater than 802.3146546958699. Delta is -0.011808867827312497 Model is not converging. Current: 807.1436876705543 is not greater than 807.1591765394735. Delta is -0.0154888689191921 Model is not converging. Current: 810.5752511766023 is not greater than 810.5813260425551. Delta is -0.0060748659528826465 Model is not converging. Current: 821.7273395478358 is not greater than 821.7307527961469. Delta is -0.0034132483111761758 Model is not converging. Current: 827.9244985074381 is not greater than 827.9528469281752. Delta is -0.028348420737074775 Model is not converging. Current: 835.8103198323417 is not greater than 835.8585923483879. Delta is -0.048272516046154124 Model is not converging. Current: 847.4534753481902 is not greater than 847.5026036515641. Delta is -0.04912830337389096 Model is not converging. Current: 853.4859531129838 is not greater than 853.5392008449256. Delta is -0.05324773194183763 Model is not converging. Current: 859.2298101041796 is not greater than 859.2347896297322. Delta is -0.0049795255525850735 Model is not converging. Current: 886.5173713872819 is not greater than 886.5177155472784. Delta is -0.0003441599965299247 Model is not converging. Current: 889.0659516098925 is not greater than 889.067108567774. Delta is -0.0011569578814487613 Model is not converging. Current: 891.8801274777119 is not greater than 891.8849322486119. Delta is -0.004804770900022959 Model is not converging. Current: 888.896785396754 is not greater than 888.9021897602139. Delta is -0.005404363459888373 Model is not converging. Current: 887.9299939293911 is not greater than 887.9347266108393. Delta is -0.0047326814482175905 Model is not converging. Current: 867.5176948424016 is not greater than 867.5225249923326. Delta is -0.004830149930967309 Model is not converging. Current: 872.3943207985001 is not greater than 872.4004505907086. Delta is -0.0061297922085259415 Model is not converging. Current: 872.904772488802 is not greater than 872.910368388421. Delta is -0.005595899619038391 Model is not converging. Current: 874.5897691526415 is not greater than 874.5956475562793. Delta is -0.005878403637893825 Model is not converging. Current: 900.0019490116481 is not greater than 900.004941824777. Delta is -0.0029928131289125304 Model is not converging. Current: 910.1375998882303 is not greater than 910.1472405844145. Delta is -0.009640696184192166 Model is not converging. Current: 908.8200472438695 is not greater than 908.8253864291545. Delta is -0.005339185285038184 Model is not converging. Current: 906.0096393890362 is not greater than 906.0175465931312. Delta is -0.007907204094976805 Model is not converging. Current: 905.2579314093196 is not greater than 905.2627419622497. Delta is -0.004810552930166523 Model is not converging. Current: 945.5992188371682 is not greater than 945.600751273584. Delta is -0.0015324364158004755 Model is not converging. Current: 945.6710080866096 is not greater than 945.6985849884111. Delta is -0.027576901801467102 Model is not converging. Current: 946.8299241010748 is not greater than 946.8439878446072. Delta is -0.014063743532460649 Model is not converging. Current: 937.5310918771877 is not greater than 937.5443632747306. Delta is -0.013271397542894192 Model is not converging. Current: 931.0212196207847 is not greater than 931.0416781259825. Delta is -0.02045850519778014 Model is not converging. Current: 925.8114553463162 is not greater than 925.8289009561839. Delta is -0.01744560986776378 Model is not converging. Current: 925.5098305801708 is not greater than 925.5107867944532. Delta is -0.0009562142823824615 Model is not converging. Current: 923.0202503962339 is not greater than 923.0267896254225. Delta is -0.006539229188547324 Model is not converging. Current: 919.8072935804615 is not greater than 919.8166666734962. Delta is -0.009373093034696467 Model is not converging. Current: 926.4550415142594 is not greater than 926.4618845082546. Delta is -0.0068429939951784036 Model is not converging. Current: 935.446211722793 is not greater than 935.4660563738701. Delta is -0.019844651077050912 Model is not converging. Current: 933.8331962047333 is not greater than 933.8516910556814. Delta is -0.01849485094805914 Model is not converging. Current: 931.8850770976186 is not greater than 931.9021875736622. Delta is -0.017110476043626477 Model is not converging. Current: 928.2211334548526 is not greater than 928.2345594299329. Delta is -0.013425975080281205 Model is not converging. Current: 928.1656241651715 is not greater than 928.1772466963597. Delta is -0.011622531188208995 Model is not converging. Current: 926.2519906966522 is not greater than 926.2555130090212. Delta is -0.0035223123690002467 Model is not converging. Current: 925.8051061991202 is not greater than 925.8119030426603. Delta is -0.0067968435400871385 Model is not converging. Current: 932.6100126693772 is not greater than 932.6295007565639. Delta is -0.019488087186687153 Model is not converging. Current: 936.2844897132971 is not greater than 936.3065868385582. Delta is -0.022097125261097972 Model is not converging. Current: 942.8088491847075 is not greater than 942.8305681788586. Delta is -0.021718994151115112 Model is not converging. Current: 937.5861367518366 is not greater than 937.6030378662005. Delta is -0.01690111436380448 Model is not converging. Current: 935.2943612574704 is not greater than 935.3065056812749. Delta is -0.01214442380444325 Model is not converging. Current: 928.4146546688941 is not greater than 928.4208806932622. Delta is -0.006226024368174876 Model is not converging. Current: 926.7893408786553 is not greater than 926.796599291579. Delta is -0.0072584129237611705 Model is not converging. Current: 913.9219497992952 is not greater than 913.9296641476728. Delta is -0.007714348377589886 Model is not converging. Current: 910.4012678712803 is not greater than 910.406751128743. Delta is -0.005483257462742586 Model is not converging. Current: 904.7339778157741 is not greater than 904.7369240473801. Delta is -0.0029462316059607474 Model is not converging. Current: 895.6552862172606 is not greater than 895.6626076718456. Delta is -0.007321454585053289 Model is not converging. Current: 890.3833132029898 is not greater than 890.3920564026263. Delta is -0.008743199636455756 Model is not converging. Current: 883.6239387285055 is not greater than 883.6265558304564. Delta is -0.0026171019508183235 Model is not converging. Current: 868.4684691390399 is not greater than 868.4732295631547. Delta is -0.004760424114806483 Model is not converging. Current: 864.6202427039213 is not greater than 864.6261842668736. Delta is -0.005941562952216373 Model is not converging. Current: 867.9676120760457 is not greater than 867.9722618350156. Delta is -0.004649758969890172 Model is not converging. Current: 866.8456577379745 is not greater than 866.8502222137438. Delta is -0.004564475769257115 Model is not converging. Current: 866.5303963557111 is not greater than 866.5350120031078. Delta is -0.004615647396690292 Model is not converging. Current: 866.318337333943 is not greater than 866.3236573842102. Delta is -0.005320050267187071 Model is not converging. Current: 863.4092686713299 is not greater than 863.4154556578433. Delta is -0.006186986513398551 Model is not converging. Current: 857.035615444409 is not greater than 857.0425915121772. Delta is -0.006976067768164285 Model is not converging. Current: 849.0641831086241 is not greater than 849.0678534163704. Delta is -0.0036703077463471345 Model is not converging. Current: 846.5767774036368 is not greater than 846.6210996489799. Delta is -0.044322245343096256 Model is not converging. Current: 762.1373723644891 is not greater than 762.1469870126668. Delta is -0.009614648177716845 Model is not converging. Current: 745.1315398590408 is not greater than 745.1359356929602. Delta is -0.004395833919375036 Model is not converging. Current: 760.1161165606073 is not greater than 760.1193172246923. Delta is -0.003200664084943128
=== KPI (native, pre-cost) ===
Strategy Ann.Return Ann.Vol Sharpe MaxDD ES95_day
Equal-Weight 0.0602 0.0728 0.8267 -0.2053 0.0110
Static Mean-Var 0.0182 0.0098 1.8662 -0.0132 0.0014
IVP (Const-Vol RP, rolling) 0.0199 0.0118 1.6913 -0.0216 0.0016
PCA-HMM Hard-Regime 0.0217 0.0115 1.8841 -0.0129 0.0017
Saved → ./out1/kpi_pca_native.csv
=== KPI (risk-matched @ 10%) ===
Strategy Ann.Return Ann.Vol Sharpe MaxDD ES95_day Alpha(scale)
Equal-Weight 0.0821 0.1000 0.8211 -0.2728 0.0151 1.3737
Static Mean-Var 0.1977 0.1000 1.9767 -0.1307 0.0146 10.2274
IVP (Const-Vol RP, rolling) 0.1771 0.1000 1.7707 -0.1717 0.0136 8.4931
PCA-HMM Hard-Regime 0.1995 0.1000 1.9952 -0.1076 0.0144 8.6858
Saved → ./out1/kpi_pca_matchedVol.csv
Saved net-of-costs grid → ./out1/kpi_pca_matchedVol_costGrid.csv
Saved turnover stats → ./out1/turnover_stats.csv
Saved subperiod KPIs → ./out1/kpi_pca_matchedVol_subperiods.csv
All outputs saved in: /Users/jeremy.duriez/Desktop/recherche2/out1
Prob-HMM defensive core + Momentum sleeve (K=3, 500d fit, refit 3d)¶
#!/usr/bin/env python3
# Model 2 — Hybrid: Prob-HMM defensive core + Momentum sleeve (FAIR VERSION)
# -------------------------------------------------------------------------
# Your requests:
# - Keep original (non-causal) regime weight estimation (full-sample by labels) ✅
# Kept fixes:
# - Constant-vol risk-parity baseline (IVP, rolling)
# - Risk-matched (10% ann vol) headline table + net-of-costs grid (5/10/25/50 bps)
# - Subperiod KPIs (2000–09 / 2010–19 / 2020–25)
# - ES95 (daily) metric across tables; HMM calibration (Brier, ECE)
# - Data manifest for reproducibility
# - SAFE turnover distribution plot (histogram/spike fallback; no SciPy KDE) ✅
# -------------------------------------------------------------------------
import os, glob, warnings, numpy as np, pandas as pd, matplotlib.pyplot as plt
from datetime import datetime
warnings.filterwarnings("ignore")
# ---------------- Config
DATA_DIR="./data2"; OUT_DIR="./out2"
K=3; ROLL_VOL_N=21; ROLL_FIT_N=500; REFIT_EVERY=3
RIDGE=1e-6; CLIP=0.5; SEED=7
MOMO_LB=126; MOMO_MIN_OBS=90; SOFTMAX_TAU=3.0; IVOL_WIN=21
GAMMA_MAX=0.40
# Fairness / reporting
TARGET_VOL = 0.10 # risk-matched target vol (annualized)
COST_BPS_GRID = [5,10,25,50]
IVP_WIN = 63 # inverse-vol lookback
IVP_REBAL = 21 # rebalance every N days
np.random.seed(SEED); os.makedirs(OUT_DIR, exist_ok=True)
from hmmlearn.hmm import GaussianHMM
# ---------------- IO / utils
def load_data_from_dir(path):
files=sorted(glob.glob(os.path.join(path,"*.csv"))); frames=[]
for f in files:
tkr=os.path.splitext(os.path.basename(f))[0].upper()
df=pd.read_csv(f)
if "Date" not in df.columns and "date" in df.columns: df=df.rename(columns={"date":"Date"})
close_col=None
for c in ["Close","Adj Close","close","adj_close","AdjClose"]:
if c in df.columns: close_col=c; break
if close_col is None: continue
frames.append(df[["Date",close_col]].rename(columns={"Date":"Date", close_col:tkr}))
if not frames: raise FileNotFoundError("No usable CSVs.")
out=frames[0]
for df in frames[1:]: out=out.merge(df,on="Date",how="outer")
out["Date"]=pd.to_datetime(out["Date"]); out=out.sort_values("Date").set_index("Date").ffill().dropna(how="all")
return out
def to_log_returns(P): return np.log(P/P.shift(1))
def ew_vol_feature(R, n=21):
ew=R.mean(axis=1); vol=ew.rolling(n).std()
return np.log(vol + 1e-12).rename("x_ewvol")
def mean_var_weights(mu, Sigma, ridge=1e-6, clip=0.5):
S=Sigma + ridge*np.eye(len(mu))
w=np.linalg.pinv(S) @ mu.reshape(-1,1); w=w.ravel()
w = w/np.abs(w).sum() if np.abs(w).sum()>0 else np.ones_like(w)/len(w)
w = np.clip(w, -clip, clip); w = w/np.abs(w).sum() if np.abs(w).sum()>0 else np.ones_like(w)/len(w)
return w
def ann_kpis_from_returns(rets, freq=252):
rets=rets.dropna()
if rets.empty: return dict(ret=np.nan, vol=np.nan, sharpe=np.nan, maxdd=np.nan, es95=np.nan)
curve=(1+rets).cumprod()
maxdd=(curve/curve.cummax()-1).min()
ann_ret=(1+rets).prod()**(freq/len(rets)) - 1
ann_vol=rets.std()*np.sqrt(freq)
sharpe=ann_ret/ann_vol if ann_vol>0 else np.nan
q=np.quantile(rets, 0.05)
es95 = -rets[rets<=q].mean() if (rets<=q).any() else np.nan
return dict(ret=ann_ret, vol=ann_vol, sharpe=sharpe, maxdd=maxdd, es95=es95)
def smooth_drawdown_from_returns(rets):
curve=(1+rets.fillna(0)).cumprod()
dd=curve/curve.cummax()-1.0
return curve, dd
def softmax(x, tau=1.0):
x=np.asarray(x,dtype=float); x=x-np.nanmax(x); ex=np.exp(x/max(1e-12,tau))
ex[np.isnan(ex)]=0.0; s=ex.sum(); return ex/s if s>0 else np.zeros_like(ex)
def inverse_vol_weights(R_window: pd.DataFrame):
vol = R_window.std().replace(0, np.nan)
w = 1.0/vol
w = w.replace([np.inf,-np.inf], np.nan).fillna(0.0)
if w.sum()>0: w = w/w.sum()
else: w = pd.Series(np.ones(len(R_window.columns))/len(R_window.columns), index=R_window.columns)
return w.values
def turnover_series(W: pd.DataFrame):
Wf=W.fillna(method="ffill").fillna(0.0)
dW=(Wf - Wf.shift(1)).abs().sum(axis=1)*0.5
dW.iloc[0]=0.0
return dW
def apply_costs(gross: pd.Series, turnover: pd.Series, cost_bps: float):
c=cost_bps/10000.0
return gross - c*turnover
def risk_match(rets: pd.Series, target_vol=0.10, freq=252):
vol = rets.std()*np.sqrt(freq)
if not np.isfinite(vol) or vol<=0: return rets.copy(), 1.0
alpha = target_vol/vol
return rets*alpha, alpha
# ORIGINAL (non-causal) per-regime MV weights from full-sample labels
def regime_weights_from_labels(R, labels, ridge=1e-6, clip=0.5):
out={}; N=R.shape[1]
for k in sorted(labels.dropna().unique()):
idx=labels.index[labels==k]
if len(idx)<max(60,N*5): out[int(k)]=np.ones(N)/N
else:
sub=R.loc[idx]; mu=sub.mean().values; Sigma=sub.cov().values
out[int(k)] = mean_var_weights(mu,Sigma,ridge,clip)
return out
# ---------------- Data & feature
prices=load_data_from_dir(DATA_DIR)
R=to_log_returns(prices).dropna()
N=R.shape[1]; tickers=list(R.columns)
# Manifest (repro)
manifest = pd.DataFrame({
"Ticker": R.columns,
"FirstDate": [R.index.min()]*N,
"LastDate": [R.index.max()]*N,
"Obs": [R[c].dropna().shape[0] for c in R.columns]
})
os.makedirs(OUT_DIR, exist_ok=True)
manifest.to_csv(os.path.join(OUT_DIR,"m2_manifest.csv"), index=False)
x=ew_vol_feature(R, ROLL_VOL_N).dropna()
x_df=x.to_frame(); R_aligned=R.loc[x_df.index]; dates=x_df.index
print(f"Loaded {N} assets, {len(dates)} obs.")
# ---------------- Prob-HMM core (filtered & one-step-ahead)
labels=pd.Series(index=dates, dtype=int)
filt_prob=pd.DataFrame(index=dates, columns=[f"p{k}" for k in range(K)], dtype=float)
ahead_prob=filt_prob.copy()
model=None; last_fit=None
for i, dt in enumerate(dates):
refit=(last_fit is None) or ((i-last_fit)>=REFIT_EVERY)
if refit:
s=max(0,i-ROLL_FIT_N); X=x_df.iloc[s:i+1].values
if len(X)<max(120, ROLL_FIT_N//4): continue
model=GaussianHMM(n_components=K, covariance_type='diag', n_iter=200, random_state=SEED); model.fit(X); last_fit=i
s=max(0,i-ROLL_FIT_N); X=x_df.iloc[s:i+1].values
path=model.predict(X); post=model.predict_proba(X)
labels.iloc[i]=path[-1]; alpha=post[-1]; filt_prob.iloc[i]=alpha; ahead_prob.iloc[i]=alpha @ model.transmat_
# ---------------- Remap calm→mid→stress (by feature mean)
means=[(k, x_df.loc[labels==k,"x_ewvol"].mean()) for k in range(K)]
order=[k for (k,_) in sorted(means, key=lambda z:z[1])]
remap={old:new for new,old in enumerate(order)}
labels=labels.map(remap)
filt_prob=filt_prob.rename(columns={f"p{old}":f"p{remap[old]}" for old in range(K)})
ahead_prob=ahead_prob.rename(columns={f"p{old}":f"p{remap[old]}" for old in range(K)})
# ---------------- Defensive core weights (NON-CAUSAL, original)
regime_w = regime_weights_from_labels(R_aligned, labels, RIDGE, CLIP)
W_core=pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
for dt in W_core.index:
p=ahead_prob.loc[dt]
if p.isna().any(): W_core.loc[dt]=np.ones(N)/N; continue
w=np.zeros(N)
for k in range(K): w += float(p[f"p{k}"])*regime_w.get(k, np.ones(N)/N)
w=np.clip(w, -CLIP, CLIP); ssum=np.abs(w).sum(); W_core.loc[dt]= w/(ssum if ssum>0 else 1.0)
# ---------------- Momentum sleeve (unchanged logic)
rets=R_aligned.copy(); price_proxy=(1+rets).cumprod()
roll_vol=rets.rolling(IVOL_WIN).std()
roll_ret=price_proxy/price_proxy.shift(MOMO_LB) - 1.0
momo_score=(roll_ret/(roll_vol+1e-12)).replace([np.inf,-np.inf], np.nan).fillna(0.0)
momo_score=momo_score.sub(momo_score.mean(axis=1), axis=0)
momo_score=momo_score.div(momo_score.std(axis=1)+1e-12, axis=0)
inv_vol=1.0/(roll_vol+1e-12); inv_vol=inv_vol.div(inv_vol.sum(axis=1), axis=0).fillna(0.0)
W_sleeve=pd.DataFrame(index=rets.index, columns=rets.columns, dtype=float)
for dt in rets.index:
srow=momo_score.loc[dt] if dt in momo_score.index else None
vrow=inv_vol.loc[dt] if dt in inv_vol.index else None
if srow is None or vrow is None or srow.isna().all() or vrow.isna().all():
W_sleeve.loc[dt]=np.ones(N)/N; continue
w_raw=softmax(srow.values, tau=SOFTMAX_TAU)
w=w_raw * vrow.values
w=w/np.abs(w).sum() if np.abs(w).sum()>0 else np.ones(N)/N
W_sleeve.loc[dt]=w
W_sleeve=W_sleeve.loc[W_core.index].fillna(method="ffill")
# ---------------- Regime-aware blend
p_stress=ahead_prob[f"p{K-1}"].reindex(W_core.index).fillna(method="ffill").clip(0,1)
gamma_t=GAMMA_MAX*(1 - p_stress)
W_final=pd.DataFrame(index=W_core.index, columns=W_core.columns, dtype=float)
for dt in W_core.index:
g=float(gamma_t.loc[dt]) if dt in gamma_t.index else 0.0
w=(1-g)*W_core.loc[dt].values + g*W_sleeve.loc[dt].values
w=np.clip(w, -CLIP, CLIP); ssum=np.abs(w).sum(); W_final.loc[dt]= w/(ssum if ssum>0 else 1.0)
# ---------------- Daily returns & turnover
ret_core =(R_aligned * W_core.shift(1)).sum(axis=1).fillna(0.0)
ret_sleeve=(R_aligned * W_sleeve.shift(1)).sum(axis=1).fillna(0.0)
ret_final =(R_aligned * W_final.shift(1)).sum(axis=1).fillna(0.0)
turn_core = turnover_series(W_core)
turn_sleeve = turnover_series(W_sleeve)
turn_final = turnover_series(W_final)
# ---------------- Baselines
# Static MV
Sigma=R_aligned.cov().values; mu=R_aligned.mean().values
w_sta=mean_var_weights(mu, Sigma, ridge=RIDGE, clip=CLIP)
W_sta = pd.DataFrame([w_sta]*len(R_aligned), index=R_aligned.index, columns=R_aligned.columns)
ret_sta=(R_aligned @ pd.Series(w_sta, index=R_aligned.columns)).fillna(0.0)
turn_sta = turnover_series(W_sta)
# Equal-Weight
w_ew=np.ones(N)/N
W_ew = pd.DataFrame([w_ew]*len(R_aligned), index=R_aligned.index, columns=R_aligned.columns)
ret_ew=(R_aligned @ pd.Series(w_ew, index=R_aligned.columns)).fillna(0.0)
turn_ew = turnover_series(W_ew)
# Constant-Vol Risk-Parity (IVP), rolling
W_ivp = pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
for i, dt in enumerate(R_aligned.index):
if i<IVP_WIN:
W_ivp.iloc[i]=np.ones(N)/N
else:
if i % IVP_REBAL == 0 or i == IVP_WIN:
Rw=R_aligned.iloc[i-IVP_WIN:i]
W_ivp.iloc[i]=inverse_vol_weights(Rw)
else:
W_ivp.iloc[i]=W_ivp.iloc[i-1]
W_ivp=W_ivp.fillna(method="ffill").fillna(1.0/N)
ret_ivp=(R_aligned * W_ivp.shift(1)).sum(axis=1).fillna(0.0)
turn_ivp = turnover_series(W_ivp)
# ---------------- KPIs (native, pre-cost)
def krow(name, rets):
k=ann_kpis_from_returns(rets)
return [name, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']]
kpi_native = pd.DataFrame(
[krow("Equal-Weight", ret_ew),
krow("Static MV", ret_sta),
krow("IVP (Const-Vol RP, rolling)", ret_ivp),
krow("HMM Core (prob)", ret_core),
krow("Momentum Sleeve", ret_sleeve),
krow("Hybrid Ensemble", ret_final)],
columns=["Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day"]
)
kpi_native.to_csv(os.path.join(OUT_DIR,"m2_kpi_native.csv"), index=False)
print("\n=== Model 2 KPIs (native, pre-cost) ===")
print(kpi_native.to_string(index=False, float_format=lambda x: f"{x:,.4f}"))
# ---------------- Risk-matched table (10% vol)
def matched_series_and_alpha(name, rets, turn):
rm, alpha = risk_match(rets, TARGET_VOL)
return name, rm, turn*alpha, alpha, ann_kpis_from_returns(rm)
matched = {}
turn_m = {}
alphas = {}
rows=[]
for name, rets, turn in [
("Equal-Weight", ret_ew, turn_ew),
("Static MV", ret_sta, turn_sta),
("IVP (Const-Vol RP, rolling)", ret_ivp, turn_ivp),
("HMM Core (prob)", ret_core, turn_core),
("Momentum Sleeve", ret_sleeve, turn_sleeve),
("Hybrid Ensemble", ret_final, turn_final),
]:
nm, rm, tm, a, k = matched_series_and_alpha(name, rets, turn)
matched[nm]=rm; turn_m[nm]=tm; alphas[nm]=a
rows.append([nm, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95'], a])
kpi_matched = pd.DataFrame(rows, columns=["Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day","Alpha(scale)"])
kpi_matched.to_csv(os.path.join(OUT_DIR,"m2_kpi_matchedVol.csv"), index=False)
print("\n=== Model 2 KPIs (risk-matched @ {:.0f}%) ===".format(TARGET_VOL*100))
print(kpi_matched.to_string(index=False, float_format=lambda x: f"{x:,.4f}"))
# ---------------- Net-of-costs grid on risk-matched
def cost_grid(cost_list):
out=[]
for c in cost_list:
for nm in matched:
net = apply_costs(matched[nm], turn_m[nm], c)
k = ann_kpis_from_returns(net)
out.append([c, nm, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']])
return pd.DataFrame(out, columns=["Cost_bps","Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day"])
kpi_costs = cost_grid(COST_BPS_GRID)
kpi_costs.to_csv(os.path.join(OUT_DIR,"m2_kpi_matchedVol_costGrid.csv"), index=False)
print("\nSaved net-of-costs grid → ./out/m2_kpi_matchedVol_costGrid.csv")
# ---------------- Turnover stats
turn_stats=[]
for nm, ts in turn_m.items():
turn_stats.append([nm, ts.mean(), ts.median(), ts.quantile(0.9), ts.max()])
turn_df = pd.DataFrame(turn_stats, columns=["Strategy","Turnover_mean","Turnover_median","Turnover_p90","Turnover_max"])
turn_df.to_csv(os.path.join(OUT_DIR,"m2_turnover_stats.csv"), index=False)
# ---------------- Subperiod panels (risk-matched)
def subperiod_slices(idx):
eras=[("2000-01-01","2009-12-31"), ("2010-01-01","2019-12-31"), ("2020-01-01", str(idx.max().date()))]
out=[]
for s,e in eras:
sdt=pd.Timestamp(s); edt=pd.Timestamp(e)
mask=(idx>=sdt)&(idx<=edt)
if mask.any(): out.append((s,e,mask))
return out
subs=[]
for (s,e,mask) in subperiod_slices(R_aligned.index):
for nm in matched:
k=ann_kpis_from_returns(matched[nm].loc[mask])
subs.append([f"{s}–{e}", nm, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']])
sub_df=pd.DataFrame(subs, columns=["Period","Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day"])
sub_df.to_csv(os.path.join(OUT_DIR,"m2_kpi_matchedVol_subperiods.csv"), index=False)
# ---------------- HMM calibration (one-step-ahead)
# Compare ahead_prob(t) with realized label at t+1 (shift by 1)
probs = ahead_prob.copy().dropna()
y_true = labels.reindex(probs.index).shift(-1).dropna()
probs = probs.loc[y_true.index]
# Multiclass Brier
Y = np.zeros((len(y_true), K))
for i, y in enumerate(y_true.values.astype(int)):
if 0 <= y < K: Y[i, y]=1.0
P = probs[[f"p{k}" for k in range(K)]].values
brier = np.mean(np.sum((P - Y)**2, axis=1))
# ECE (top-class, 10 bins)
conf = P.max(axis=1); pred = P.argmax(axis=1); acc = (pred==y_true.values.astype(int)).astype(float)
bins = np.linspace(0,1,11); ece=0.0
for b0, b1 in zip(bins[:-1], bins[1:]):
idx = (conf>=b0) & (conf<b1)
if idx.any():
ece += np.mean(idx) * abs(acc[idx].mean() - conf[idx].mean())
cal_df = pd.DataFrame({"Metric":["Brier","ECE_top10"], "Value":[brier, ece]})
cal_df.to_csv(os.path.join(OUT_DIR,"m2_hmm_calibration.csv"), index=False)
# ---------------- SAFE turnover distribution (no SciPy KDE)
def plot_turnover_distribution(turn_map, out_path):
fig, ax = plt.subplots(figsize=(10,4))
for name, ts in turn_map.items():
s = pd.Series(ts).replace([np.inf, -np.inf], np.nan).dropna()
s = s[np.isfinite(s)]
if (len(s) == 0) or (s.std() <= 1e-12) or (s.nunique() < 3):
val = float(s.iloc[0]) if len(s) else 0.0
ax.axvline(val, linestyle="--", linewidth=1.5, label=f"{name} (spike)")
else:
counts, bins = np.histogram(s, bins=40, density=True)
centers = 0.5*(bins[1:] + bins[:-1])
ax.plot(centers, counts, linewidth=1.5, label=name)
ax.set_title("Turnover distribution (one-way, risk-matched)")
ax.grid(True); ax.legend()
fig.tight_layout(); fig.savefig(out_path, dpi=180)
plot_turnover_distribution(turn_m, os.path.join(OUT_DIR,"m2_fig_turnover_density.png"))
# ---------------- Figures (standardized, comparable)
# Risk-matched log cumulative wealth (pre-cost)
plt.figure(figsize=(10,4))
for nm, sr in matched.items():
curve=(1+sr).cumprod()
plt.plot(curve.index, np.log(curve), label=nm)
plt.title("Log Cumulative Wealth — Risk-matched (10% vol)"); plt.legend(); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"m2_fig_cum_wealth_matched.png"), dpi=180)
# Risk-matched drawdowns
def ddplot_from_rets(name, rets):
curve, dd = smooth_drawdown_from_returns(rets)
plt.figure(figsize=(10,3)); plt.plot(dd)
plt.title(f"Drawdown (risk-matched) — {name}"); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, f"m2_fig_drawdown_matched_{name.replace(' ','_').replace('-','')}.png"), dpi=180)
for nm, sr in matched.items():
ddplot_from_rets(nm, sr)
# Native wealth (pre-cost)
curve_core =(1+ret_core).cumprod()
curve_sleeve=(1+ret_sleeve).cumprod()
curve_final =(1+ret_final).cumprod()
curve_sta =(1+ret_sta).cumprod()
curve_ew =(1+ret_ew).cumprod()
plt.figure(figsize=(10,4))
plt.plot(np.log(curve_final), label="Hybrid Ensemble")
plt.plot(np.log(curve_core), label="HMM Core (prob)")
plt.plot(np.log(curve_sleeve), label="Momentum Sleeve")
plt.plot(np.log(curve_sta), label="Static MV")
plt.plot(np.log(curve_ew), label="Equal-Weight")
plt.title("Log Cumulative Wealth — Model 2 (native)"); plt.legend(); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"m2_cum_wealth_native.png"), dpi=180)
# Prob ribbon
cols=[f"p{k}" for k in range(K)]
pdf=ahead_prob[cols].dropna(how="any")
plt.figure(figsize=(12,3.6))
plt.stackplot(pdf.index, pdf.values.T, labels=cols, alpha=0.9)
plt.ylim(0,1); plt.title("One-step-ahead Probabilities — Model 2"); plt.legend(ncol=K, fontsize=8, frameon=False)
plt.grid(True, alpha=.2); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"m2_prob_ribbon.png"), dpi=180)
# Weights timelines
plt.figure(figsize=(11,4))
plt.stackplot(W_core.index, W_core.fillna(method="ffill").values.T, labels=tickers)
plt.title("Portfolio Weights — HMM Core (prob)"); plt.legend(loc="upper left", ncol=min(N,4), fontsize=8)
plt.tight_layout(); plt.savefig(os.path.join(OUT_DIR,"m2_weights_core.png"), dpi=180)
plt.figure(figsize=(11,4))
plt.stackplot(W_sleeve.index, W_sleeve.fillna(method="ffill").values.T, labels=tickers)
plt.title("Portfolio Weights — Momentum Sleeve"); plt.legend(loc="upper left", ncol=min(N,4), fontsize=8)
plt.tight_layout(); plt.savefig(os.path.join(OUT_DIR,"m2_weights_sleeve.png"), dpi=180)
plt.figure(figsize=(11,4))
plt.stackplot(W_final.index, W_final.fillna(method="ffill").values.T, labels=tickers)
plt.title("Portfolio Weights — Hybrid Ensemble"); plt.legend(loc="upper left", ncol=min(N,4), fontsize=8)
plt.tight_layout(); plt.savefig(os.path.join(OUT_DIR,"m2_weights_final.png"), dpi=180)
# Sleeve intensity
plt.figure(figsize=(10,3))
plt.plot(gamma_t.index, gamma_t.values)
plt.title(r"Sleeve Intensity $\gamma_t$ = GAMMA_MAX · (1 − P_{stress})"); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"m2_sleeve_intensity.png"), dpi=180)
# Momentum heatmap (z-scores)
momo_sub=momo_score.loc[W_core.index].copy(); Z=momo_sub.values.T
fig, ax = plt.subplots(figsize=(12, 3 + 0.12*N))
im=ax.imshow(Z, aspect="auto", cmap="viridis", vmin=-2.5, vmax=2.5)
ax.set_yticks(range(N)); ax.set_yticklabels(tickers)
xt=np.linspace(0, Z.shape[1]-1, 8, dtype=int)
ax.set_xticks(xt); ax.set_xticklabels([momo_sub.index[i].strftime("%Y") for i in xt])
fig.colorbar(im, ax=ax, fraction=0.025, pad=0.02).set_label("Momentum z-score")
ax.set_title("Momentum Heatmap — Model 2"); fig.tight_layout()
fig.savefig(os.path.join(OUT_DIR,"m2_momo_heatmap.png"), dpi=180)
print(f"\nFigures & tables saved in: {os.path.abspath(OUT_DIR)}")
Loaded 13 assets, 3404 obs.
Model is not converging. Current: 354.1968677568538 is not greater than 354.1970358707842. Delta is -0.00016811393038551614 Model is not converging. Current: 354.429513131319 is not greater than 354.43020147967803. Delta is -0.0006883483590058859 Model is not converging. Current: 235.14949633390336 is not greater than 235.14983045425902. Delta is -0.0003341203556601613 Model is not converging. Current: 241.04640901288684 is not greater than 241.0471493121839. Delta is -0.0007402992970639843 Model is not converging. Current: 219.34526619659013 is not greater than 219.34556335749306. Delta is -0.0002971609029316369 Model is not converging. Current: 214.49601436229398 is not greater than 214.49621822328905. Delta is -0.00020386099507163635 Model is not converging. Current: 249.09560366422386 is not greater than 249.09585198536337. Delta is -0.0002483211395087892 Model is not converging. Current: 233.03566808977445 is not greater than 233.03633397871312. Delta is -0.0006658889386699229 Model is not converging. Current: 12.730394609057656 is not greater than 12.7305804804049. Delta is -0.0001858713472433493 Model is not converging. Current: -4.112930001313416 is not greater than -4.112706239098814. Delta is -0.0002237622146017415 Model is not converging. Current: -4.287934729930699 is not greater than -4.287668253373226. Delta is -0.00026647655747247256 Model is not converging. Current: -5.089796482525532 is not greater than -5.089565140890337. Delta is -0.00023134163519511475 Model is not converging. Current: -6.456079962763253 is not greater than -6.455907125397283. Delta is -0.0001728373659704019 Model is not converging. Current: -7.8114834109455025 is not greater than -7.811381662566985. Delta is -0.00010174837851728569 Model is not converging. Current: 17.548726420763575 is not greater than 17.549236372431018. Delta is -0.0005099516674427207 Model is not converging. Current: 78.56205558779848 is not greater than 78.56212918975174. Delta is -7.360195326100438e-05 Model is not converging. Current: 166.48253361906458 is not greater than 166.48298203453913. Delta is -0.00044841547455121145 Model is not converging. Current: 172.88710270855572 is not greater than 172.8881344222597. Delta is -0.0010317137039805857 Model is not converging. Current: 160.4287719262974 is not greater than 160.43127858962117. Delta is -0.002506663323771363
=== Model 2 KPIs (native, pre-cost) ===
Strategy Ann.Return Ann.Vol Sharpe MaxDD ES95_day
Equal-Weight 0.0582 0.0725 0.8030 -0.2053 0.0109
Static MV 0.0179 0.0096 1.8577 -0.0135 0.0014
IVP (Const-Vol RP, rolling) 0.0171 0.0120 1.4249 -0.0306 0.0017
HMM Core (prob) 0.0239 0.0135 1.7693 -0.0306 0.0018
Momentum Sleeve 0.0162 0.0087 1.8514 -0.0184 0.0012
Hybrid Ensemble 0.0198 0.0087 2.2634 -0.0119 0.0012
=== Model 2 KPIs (risk-matched @ 10%) ===
Strategy Ann.Return Ann.Vol Sharpe MaxDD ES95_day Alpha(scale)
Equal-Weight 0.0797 0.1000 0.7968 -0.2738 0.0150 1.3790
Static MV 0.1967 0.1000 1.9669 -0.1354 0.0144 10.3920
IVP (Const-Vol RP, rolling) 0.1467 0.1000 1.4666 -0.2306 0.0142 8.3263
HMM Core (prob) 0.1859 0.1000 1.8590 -0.2077 0.0135 7.4043
Momentum Sleeve 0.1961 0.1000 1.9607 -0.1948 0.0140 11.4303
Hybrid Ensemble 0.2454 0.1000 2.4544 -0.1332 0.0141 11.4413
Saved net-of-costs grid → ./out/m2_kpi_matchedVol_costGrid.csv
Figures & tables saved in: /Users/jeremy.duriez/Desktop/recherche2/out2
Model 3 — RA-HRP: Prob-HMM on features + per-regime HRP allocation¶
#!/usr/bin/env python3
# Model 3 — RA-HRP: Prob-HMM on features + per-regime HRP allocation (FAIR VERSION)
# -------------------------------------------------------------------------
# Kept core:
# - Per-regime HRP weights blended by one-step-ahead HMM probs (your original)
# Additions:
# - HRP on mixture covariance Σ̄_t = Σ_s p_s(t) (exact alternative to prob-blend)
# - Constant-vol Risk-Parity (IVP) baseline (rolling)
# - Risk-matched (10% ann vol) headline table + net-of-costs grid (5/10/25/50 bps)
# - Subperiod KPIs (2000–09 / 2010–19 / 2020–*)
# - ES95 (daily) metric, turnover stats, safe turnover plot (no SciPy KDE)
# - HMM calibration (Brier, ECE) and data manifest
# FIXES:
# - "Static HRP" now FAIR (expanding window + monthly rebalance + shrinkage; no look-ahead)
# - Portfolio returns computed as simple returns from weighted log-returns
# -------------------------------------------------------------------------
import os, glob, warnings, numpy as np, pandas as pd, matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
# ---------- Config
DATA_DIR="./data2"; OUT_DIR="./out3"
K=3; ROLL_VOL_N=21; ROLL_FIT_N=500; REFIT_EVERY=3
CLIP=0.5; SEED=7
TARGET_VOL = 0.10 # risk-matched target annual vol
COST_BPS_GRID = [5,10,25,50] # bps per one-way turnover
IVP_WIN=63; IVP_REBAL=21 # IVP baseline params
LABEL_HRP = "Static HRP" # keep legacy name for comparability
np.random.seed(SEED); os.makedirs(OUT_DIR, exist_ok=True)
from hmmlearn.hmm import GaussianHMM
# --- Shrinkage (optional, with fallback) ---
try:
from sklearn.covariance import LedoitWolf
except Exception:
LedoitWolf = None
# ---------- IO / core utils
def load_data_from_dir(path):
files=sorted(glob.glob(os.path.join(path,"*.csv"))); frames=[]
for f in files:
tkr=os.path.splitext(os.path.basename(f))[0].upper()
df=pd.read_csv(f)
if "Date" not in df.columns and "date" in df.columns: df=df.rename(columns={"date":"Date"})
for c in ["Close","Adj Close","close","adj_close","AdjClose"]:
if c in df.columns:
frames.append(df[["Date",c]].rename(columns={"Date":"Date", c:tkr})); break
if not frames: raise FileNotFoundError("No usable CSVs.")
out=frames[0]
for df in frames[1:]: out=out.merge(df,on="Date",how="outer")
out["Date"]=pd.to_datetime(out["Date"]); out=out.sort_values("Date").set_index("Date").ffill().dropna(how="all")
return out
def to_log_returns(P): return np.log(P/P.shift(1))
def ew_vol_feature(R, n=21):
ew=R.mean(axis=1); vol=ew.rolling(n).std()
return np.log(vol + 1e-12).rename("x_ewvol")
def pca_pc1_var_feature(R, win=63):
from numpy.linalg import eigh
vals, idxs = [], []
for i in range(win-1, len(R)):
X = R.iloc[i-win+1:i+1].dropna(how="any")
if len(X)<win: continue
C = X.cov().values
evals,_ = eigh(C); lam1=float(evals[-1]); total=float(evals.sum())+1e-12
vals.append(np.log(lam1/total + 1e-12)); idxs.append(R.index[i])
return pd.Series(vals, index=idxs, name="x_pc1")
def ann_kpis_from_returns(rets, freq=252):
rets = pd.Series(rets).dropna()
if rets.empty: return dict(ret=np.nan, vol=np.nan, sharpe=np.nan, maxdd=np.nan, es95=np.nan)
curve=(1+rets).cumprod()
maxdd=(curve/curve.cummax()-1).min()
ann_ret=(1+rets).prod()**(freq/len(rets)) - 1
ann_vol=rets.std()*np.sqrt(freq)
sharpe=ann_ret/ann_vol if ann_vol>0 else np.nan
q=np.quantile(rets, 0.05)
es95 = -rets[rets<=q].mean() if (rets<=q).any() else np.nan
return dict(ret=ann_ret, vol=ann_vol, sharpe=sharpe, maxdd=maxdd, es95=es95)
def smooth_drawdown_from_returns(rets):
curve=(1+pd.Series(rets).fillna(0)).cumprod()
dd=curve/curve.cummax()-1.0
return curve, dd
# ---------- HRP helpers
def inverse_variance_portfolio(Sigma):
d=np.diag(Sigma).clip(1e-12,None); iv=1.0/d; w=iv/iv.sum(); return w
def _corr_from_cov(Sigma):
std=np.sqrt(np.diag(Sigma)).clip(1e-12,None)
return Sigma/np.outer(std,std)
def _seriation_greedy(Corr):
N=Corr.shape[0]; remaining=list(range(N))
start=int(np.argmax(Corr.mean(axis=1))); order=[start]; remaining.remove(start)
while remaining:
last=order[-1]; nxt=max(remaining, key=lambda j: Corr[last,j])
order.append(nxt); remaining.remove(nxt)
return order
def hrp_weights(Sigma):
Corr=_corr_from_cov(Sigma); order=_seriation_greedy(Corr)
N=Sigma.shape[0]; w=np.ones(N); clusters=[order]
while clusters:
new=[]
for cl in clusters:
if len(cl)<=1: continue
m=len(cl)//2; c1, c2 = cl[:m], cl[m:]
S1=Sigma[np.ix_(c1,c1)]; S2=Sigma[np.ix_(c2,c2)]
w1=inverse_variance_portfolio(S1); v1=float(w1@S1@S1@w1) if False else float(w1@S1@w1)
w2=inverse_variance_portfolio(S2); v2=float(w2@S2@S2@w2) if False else float(w2@S2@w2)
a = 1.0 - v1/(v1+v2) if (v1+v2)>0 else 0.5
w[c1]*=a; w[c2]*=(1.0-a)
if len(c1)>1: new.append(c1)
if len(c2)>1: new.append(c2)
clusters=new
w=np.clip(w,0,None); s=np.abs(w).sum(); return w/(s if s>0 else 1.0)
def inverse_vol_weights(R_window: pd.DataFrame):
vol = R_window.std().replace(0, np.nan)
w = 1.0/vol
w = w.replace([np.inf,-np.inf], np.nan).fillna(0.0)
if w.sum()>0: w = w/w.sum()
else: w = pd.Series(np.ones(len(R_window.columns))/len(R_window.columns), index=R_window.columns)
return w.values
def turnover_series(W: pd.DataFrame):
Wf=W.fillna(method="ffill").fillna(0.0)
dW=(Wf - Wf.shift(1)).abs().sum(axis=1)*0.5
dW.iloc[0]=0.0
return dW
def apply_costs(gross: pd.Series, turnover: pd.Series, cost_bps: float):
c=cost_bps/10000.0
return gross - c*turnover
def risk_match(rets: pd.Series, target_vol=0.10, freq=252):
vol = rets.std()*np.sqrt(freq)
if not np.isfinite(vol) or vol<=0: return rets.copy(), 1.0
alpha = target_vol/vol
return rets*alpha, alpha
def shrink_cov_matrix(R_window: pd.DataFrame):
"""
Robust covariance:
- Ledoit–Wolf shrinkage when available
- PSD projection (eigenvalue floor) as a safety
"""
X = R_window.values
if LedoitWolf is not None and X.shape[0] > X.shape[1] + 2:
try:
S = LedoitWolf().fit(X).covariance_
except Exception:
S = R_window.cov().values
else:
S = R_window.cov().values
S = np.asarray(S, float)
S[~np.isfinite(S)] = 0.0
vals, vecs = np.linalg.eigh(S)
vals = np.clip(vals, 1e-8, None)
S_psd = (vecs * vals) @ vecs.T
return S_psd
# ---------- Data & features (two-feature HMM like paper)
prices=load_data_from_dir(DATA_DIR)
R=to_log_returns(prices).dropna()
N=R.shape[1]; tickers=list(R.columns)
# Manifest (repro)
manifest = pd.DataFrame({
"Ticker": R.columns,
"FirstDate": [R.index.min()]*N,
"LastDate": [R.index.max()]*N,
"Obs": [R[c].dropna().shape[0] for c in R.columns]
})
manifest.to_csv(os.path.join(OUT_DIR, "m3_manifest.csv"), index=False)
x1=ew_vol_feature(R, n=ROLL_VOL_N)
x2=pca_pc1_var_feature(R, win=ROLL_VOL_N)
x_df=pd.concat([x1,x2], axis=1).dropna()
R_aligned=R.loc[x_df.index]; dates=x_df.index
print(f"Loaded {N} assets, {len(dates)} obs, features={list(x_df.columns)}")
# ---------- Prob-HMM on features
labels=pd.Series(index=dates, dtype=int)
filt_prob=pd.DataFrame(index=dates, columns=[f"p{k}" for k in range(K)], dtype=float)
ahead_prob=filt_prob.copy()
model=None; last_fit=None
for i, dt in enumerate(dates):
refit=(last_fit is None) or ((i-last_fit)>=REFIT_EVERY)
if refit:
s=max(0,i-ROLL_FIT_N); X=x_df.iloc[s:i+1].values
if len(X)<max(120, ROLL_FIT_N//4): continue
model=GaussianHMM(n_components=K, covariance_type='diag', n_iter=200, random_state=SEED); model.fit(X); last_fit=i
s=max(0,i-ROLL_FIT_N); X=x_df.iloc[s:i+1].values
path=model.predict(X); post=model.predict_proba(X)
labels.iloc[i]=path[-1]; alpha=post[-1]; filt_prob.iloc[i]=alpha; ahead_prob.iloc[i]=alpha @ model.transmat_
# ---------- Remap calm→mid→stress
z1=(x_df["x_ewvol"]-x_df["x_ewvol"].mean())/(x_df["x_ewvol"].std()+1e-12)
z2=(x_df["x_pc1"] -x_df["x_pc1"].mean()) /(x_df["x_pc1"].std() +1e-12)
stress=z1+z2
means=[(k, stress[labels==k].mean()) for k in range(K)]
order=[k for (k,_) in sorted(means, key=lambda z:z[1])]
remap={old:new for new,old in enumerate(order)}
labels=labels.map(remap)
filt_prob=filt_prob.rename(columns={f"p{old}":f"p{remap[old]}" for old in range(K)})
ahead_prob=ahead_prob.rename(columns={f"p{old}":f"p{remap[old]}" for old in range(K)})
# ---------- Per-regime HRP weights (NON-CAUSAL, original)
def regime_hrp_weights(R, labels):
out={}
for k in sorted(labels.dropna().unique()):
idx=labels.index[labels==k]
if len(idx) < max(60, R.shape[1]*5):
out[int(k)] = np.ones(R.shape[1])/R.shape[1]; continue
Sigma = R.loc[idx].cov().values
try: w = hrp_weights(Sigma)
except Exception: w = inverse_variance_portfolio(Sigma)
w=np.clip(w, -CLIP, CLIP); s=np.abs(w).sum(); out[int(k)] = w/(s if s>0 else 1.0)
return out
regime_w=regime_hrp_weights(R_aligned, labels)
# ---------- Also precompute per-regime covariance matrices (for Σ̄)
regime_Sigma={}
for k in sorted(labels.dropna().unique()):
idx=labels.index[labels==k]
if len(idx) < max(60, R.shape[1]*5):
regime_Sigma[int(k)] = np.diag(R_aligned.var().values)
else:
regime_Sigma[int(k)] = R_aligned.loc[idx].cov().values
# ---------- Returns helper: weighted log-returns → simple returns
def port_simple_returns_from_log(R_log: pd.DataFrame, W: pd.DataFrame):
r_log = (R_log * W.shift(1)).sum(axis=1).fillna(0.0)
return np.expm1(r_log)
# ---------- Portfolios
# (A) Original RA-HRP: prob-blend of per-regime HRP weights
W_prob=pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
for dt in W_prob.index:
p=ahead_prob.loc[dt]
if p.isna().any(): W_prob.loc[dt]=np.ones(N)/N; continue
w=np.zeros(N)
for k in range(K): w += float(p[f"p{k}"])*regime_w.get(k, np.ones(N)/N)
w=np.clip(w, -CLIP, CLIP); s=np.abs(w).sum(); W_prob.loc[dt]= w/(s if s>0 else 1.0)
# (B) Mixture-covariance HRP: HRP on Σ̄_t = Σ_s p_s(t)
W_mix=pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
for dt in W_mix.index:
p=ahead_prob.loc[dt]
if p.isna().any(): W_mix.loc[dt]=np.ones(N)/N; continue
Sbar=np.zeros((N,N))
for k in range(K):
Sbar += float(p[f"p{k}"]) * regime_Sigma.get(k, np.diag(R_aligned.var().values))
try: w = hrp_weights(Sbar)
except Exception: w = inverse_variance_portfolio(Sbar)
w=np.clip(w, -CLIP, CLIP); s=np.abs(w).sum(); W_mix.loc[dt]= w/(s if s>0 else 1.0)
# Daily simple returns & turnover
ret_prob = port_simple_returns_from_log(R_aligned, W_prob)
ret_mix = port_simple_returns_from_log(R_aligned, W_mix)
turn_prob = turnover_series(W_prob)
turn_mix = turnover_series(W_mix)
# ---------- Baselines
# Equal-Weight (constant)
w_ew=np.ones(N)/N
W_ew=pd.DataFrame([w_ew]*len(R_aligned), index=R_aligned.index, columns=R_aligned.columns)
ret_ew=port_simple_returns_from_log(R_aligned, W_ew)
turn_ew=turnover_series(W_ew)
# "Static HRP" (FAIR): expanding window, monthly rebalance, shrinkage (no look-ahead)
HRP_MIN_WIN = max(252, ROLL_FIT_N) # at least ~1y or your model fit length
HRP_REBAL = 21 # ~monthly
W_hrp = pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
last_w = np.ones(N)/N
for i, dt in enumerate(R_aligned.index):
if i < HRP_MIN_WIN:
W_hrp.iloc[i] = last_w
continue
if (i == HRP_MIN_WIN) or (i % HRP_REBAL == 0):
Rw = R_aligned.iloc[:i] # EXPANDING window up to t-1
Sigma_hat = shrink_cov_matrix(Rw)
try:
w = hrp_weights(Sigma_hat)
except Exception:
w = inverse_variance_portfolio(Sigma_hat)
w = np.clip(w, -CLIP, CLIP)
s = np.abs(w).sum()
last_w = w / (s if s > 0 else 1.0)
W_hrp.iloc[i] = last_w
W_hrp = W_hrp.fillna(method="ffill").fillna(1.0/N)
ret_hrp = port_simple_returns_from_log(R_aligned, W_hrp)
turn_hrp = turnover_series(W_hrp)
# Constant-Vol Risk-Parity (IVP), rolling
W_ivp = pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
for i, dt in enumerate(R_aligned.index):
if i<IVP_WIN:
W_ivp.iloc[i]=np.ones(N)/N
else:
if i % IVP_REBAL == 0 or i == IVP_WIN:
Rw=R_aligned.iloc[i-IVP_WIN:i]
W_ivp.iloc[i]=inverse_vol_weights(Rw)
else:
W_ivp.iloc[i]=W_ivp.iloc[i-1]
W_ivp=W_ivp.fillna(method="ffill").fillna(1.0/N)
ret_ivp=port_simple_returns_from_log(R_aligned, W_ivp)
turn_ivp=turnover_series(W_ivp)
# ---------- KPIs (native, pre-cost)
def krow(name, rets):
k=ann_kpis_from_returns(rets)
return [name, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']]
kpi_native = pd.DataFrame(
[krow("Equal-Weight", ret_ew),
krow(LABEL_HRP, ret_hrp), # keep label "Static HRP"
krow("IVP (Const-Vol RP, rolling)", ret_ivp),
krow("RA-HRP (prob-blend)", ret_prob),
krow("RA-HRP (mixture-cov HRP)", ret_mix)],
columns=["Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day"]
)
kpi_native.to_csv(os.path.join(OUT_DIR,"m3_kpi_native.csv"), index=False)
print("\n=== Model 3 KPIs (native, pre-cost) ===")
print(kpi_native.to_string(index=False, float_format=lambda x: f"{x:,.4f}"))
# ---------- Risk-matched table (10% vol)
def matched_series_and_alpha(name, rets, turn):
rm, alpha = risk_match(rets, TARGET_VOL)
return name, rm, turn*alpha, alpha, ann_kpis_from_returns(rm)
matched = {}
turn_m = {}
alphas = {}
rows=[]
for name, rets, turn in [
("Equal-Weight", ret_ew, turn_ew),
(LABEL_HRP, ret_hrp, turn_hrp), # keep label "Static HRP"
("IVP (Const-Vol RP, rolling)", ret_ivp, turn_ivp),
("RA-HRP (prob-blend)", ret_prob, turn_prob),
("RA-HRP (mixture-cov HRP)", ret_mix, turn_mix),
]:
nm, rm, tm, a, k = matched_series_and_alpha(name, rets, turn)
matched[nm]=rm; turn_m[nm]=tm; alphas[nm]=a
rows.append([nm, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95'], a])
kpi_matched = pd.DataFrame(rows, columns=["Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day","Alpha(scale)"])
kpi_matched.to_csv(os.path.join(OUT_DIR,"m3_kpi_matchedVol.csv"), index=False)
print("\n=== Model 3 KPIs (risk-matched @ {:.0f}%) ===".format(TARGET_VOL*100))
print(kpi_matched.to_string(index=False, float_format=lambda x: f"{x:,.4f}"))
# ---------- Net-of-costs grid on risk-matched
def cost_grid(cost_list):
out=[]
for c in cost_list:
for nm in matched:
net = apply_costs(matched[nm], turn_m[nm], c)
k = ann_kpis_from_returns(net)
out.append([c, nm, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']])
return pd.DataFrame(out, columns=["Cost_bps","Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day"])
kpi_costs = cost_grid(COST_BPS_GRID)
kpi_costs.to_csv(os.path.join(OUT_DIR,"m3_kpi_matchedVol_costGrid.csv"), index=False)
print("\nSaved net-of-costs grid → ./out/m3_kpi_matchedVol_costGrid.csv")
# ---------- Turnover stats
turn_stats=[]
for nm, ts in turn_m.items():
turn_stats.append([nm, ts.mean(), ts.median(), ts.quantile(0.9), ts.max()])
turn_df = pd.DataFrame(turn_stats, columns=["Strategy","Turnover_mean","Turnover_median","Turnover_p90","Turnover_max"])
turn_df.to_csv(os.path.join(OUT_DIR,"m3_turnover_stats.csv"), index=False)
# ---------- Subperiod panels (risk-matched)
def subperiod_slices(idx):
eras=[("2000-01-01","2009-12-31"), ("2010-01-01","2019-12-31"), ("2020-01-01", str(idx.max().date()))]
out=[]
for s,e in eras:
sdt=pd.Timestamp(s); edt=pd.Timestamp(e)
mask=(idx>=sdt)&(idx<=edt)
if mask.any(): out.append((s,e,mask))
return out
subs=[]
for (s,e,mask) in subperiod_slices(R_aligned.index):
for nm in matched:
k=ann_kpis_from_returns(matched[nm].loc[mask])
subs.append([f"{s}–{e}", nm, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']])
sub_df=pd.DataFrame(subs, columns=["Period","Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day"])
sub_df.to_csv(os.path.join(OUT_DIR,"m3_kpi_matchedVol_subperiods.csv"), index=False)
# ---------- HMM calibration (one-step-ahead)
probs = ahead_prob.copy().dropna()
y_true = labels.reindex(probs.index).shift(-1).dropna()
probs = probs.loc[y_true.index]
Y = np.zeros((len(y_true), K))
for i, y in enumerate(y_true.values.astype(int)):
if 0 <= y < K: Y[i, y]=1.0
P = probs[[f"p{k}" for k in range(K)]].values
brier = np.mean(np.sum((P - Y)**2, axis=1))
conf = P.max(axis=1); pred = P.argmax(axis=1); acc = (pred==y_true.values.astype(int)).astype(float)
bins = np.linspace(0,1,11); ece=0.0
for b0, b1 in zip(bins[:-1], bins[1:]):
idx = (conf>=b0) & (conf<b1)
if idx.any():
ece += np.mean(idx) * abs(acc[idx].mean() - conf[idx].mean())
pd.DataFrame({"Metric":["Brier","ECE_top10"], "Value":[brier, ece]}).to_csv(os.path.join(OUT_DIR,"m3_hmm_calibration.csv"), index=False)
# ---------- SAFE turnover distribution (no SciPy KDE)
def plot_turnover_distribution(turn_map, out_path):
fig, ax = plt.subplots(figsize=(10,4))
for name, ts in turn_map.items():
s = pd.Series(ts).replace([np.inf, -np.inf], np.nan).dropna()
s = s[np.isfinite(s)]
if (len(s) == 0) or (s.std() <= 1e-12) or (s.nunique() < 3):
val = float(s.iloc[0]) if len(s) else 0.0
ax.axvline(val, linestyle="--", linewidth=1.5, label=f"{name} (spike)")
else:
counts, bins = np.histogram(s, bins=40, density=True)
centers = 0.5*(bins[1:] + bins[:-1])
ax.plot(centers, counts, linewidth=1.5, label=name)
ax.set_title("Turnover distribution (one-way, risk-matched)")
ax.grid(True); ax.legend()
fig.tight_layout(); fig.savefig(out_path, dpi=180)
plot_turnover_distribution(turn_m, os.path.join(OUT_DIR,"m3_fig_turnover_density.png"))
# ---------- Figures (standardized)
# Risk-matched log cumulative wealth (pre-cost)
plt.figure(figsize=(10,4))
for nm, sr in matched.items():
curve=(1+sr).cumprod()
plt.plot(curve.index, np.log(curve), label=nm)
plt.title("Log Cumulative Wealth — Risk-matched (10% vol)"); plt.legend(); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"m3_fig_cum_wealth_matched.png"), dpi=180)
# Risk-matched drawdowns
def ddplot_from_rets(name, rets):
curve, dd = smooth_drawdown_from_returns(rets)
plt.figure(figsize=(10,3)); plt.plot(dd)
plt.title(f"Drawdown (risk-matched) — {name}"); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, f"m3_fig_drawdown_matched_{name.replace(' ','_').replace('-','')}.png"), dpi=180)
for nm, sr in matched.items():
ddplot_from_rets(nm, sr)
# Native wealth (pre-cost)
curve_prob=(1+ret_prob).cumprod(); curve_mix=(1+ret_mix).cumprod()
curve_hrp =(1+ret_hrp ).cumprod(); curve_ew=(1+ret_ew).cumprod()
plt.figure(figsize=(10,4))
plt.plot(np.log(curve_prob), label="RA-HRP (prob-blend)")
plt.plot(np.log(curve_mix), label="RA-HRP (mixture-cov HRP)")
plt.plot(np.log(curve_hrp), label=LABEL_HRP) # keep label "Static HRP"
plt.plot(np.log(curve_ew), label="Equal-Weight")
plt.title("Log Cumulative Wealth — Model 3 (native)"); plt.legend(); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"m3_cum_wealth_native.png"), dpi=180)
# One-step-ahead probability ribbon
cols=[f"p{k}" for k in range(K)]
pdf=ahead_prob[cols].dropna(how="any")
plt.figure(figsize=(12,3.6))
plt.stackplot(pdf.index, pdf.values.T, labels=cols, alpha=0.9)
plt.ylim(0,1); plt.title("One-step-ahead Probabilities — Model 3"); plt.legend(ncol=K, fontsize=8, frameon=False)
plt.grid(True, alpha=.2); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"m3_prob_ribbon.png"), dpi=180)
# Weights timeline (prob-blend portfolio)
plt.figure(figsize=(11,4))
plt.stackplot(W_prob.index, W_prob.fillna(method="ffill").values.T, labels=tickers)
plt.title("Portfolio Weights — RA-HRP (prob-blend)"); plt.legend(loc="upper left", ncol=min(N,4), fontsize=8)
plt.tight_layout(); plt.savefig(os.path.join(OUT_DIR,"m3_weights_prob_blend.png"), dpi=180)
# ---------- Methods note for reproducibility (kept label "Static HRP")
pd.DataFrame({
"Strategy": [LABEL_HRP],
"Method": ["Expanding window, monthly rebalance, Ledoit–Wolf shrinkage, PSD; no look-ahead"]
}).to_csv(os.path.join(OUT_DIR, "m3_method_notes.csv"), index=False)
print(f"\nFigures & tables saved in: {os.path.abspath(OUT_DIR)}")
Model is not converging. Current: 234.56608948292705 is not greater than 234.56963579667996. Delta is -0.0035463137529063715 Model is not converging. Current: 327.100573376318 is not greater than 327.10142199921546. Delta is -0.0008486228974788901
Loaded 13 assets, 3404 obs, features=['x_ewvol', 'x_pc1']
Model is not converging. Current: 427.3183204804763 is not greater than 427.32060493028047. Delta is -0.0022844498041649786 Model is not converging. Current: 428.2749629210059 is not greater than 428.2769174246335. Delta is -0.0019545036275872008 Model is not converging. Current: 509.2483583552393 is not greater than 509.24885750700827. Delta is -0.0004991517689632019 Model is not converging. Current: 512.7352541672615 is not greater than 512.7356723270244. Delta is -0.0004181597629440148 Model is not converging. Current: 516.4769012624967 is not greater than 516.4782103939042. Delta is -0.0013091314075381888 Model is not converging. Current: 475.9218608214578 is not greater than 475.9243333213066. Delta is -0.0024724998488068195 Model is not converging. Current: 543.2106435761854 is not greater than 543.2143222719205. Delta is -0.003678695735175097 Model is not converging. Current: 612.2601918751456 is not greater than 612.2668734441668. Delta is -0.006681569021225187 Model is not converging. Current: 606.8076075184647 is not greater than 606.8176610287069. Delta is -0.010053510242187258 Model is not converging. Current: 601.3155875455163 is not greater than 601.321173603596. Delta is -0.005586058079643408 Model is not converging. Current: 595.530505498803 is not greater than 595.5430063848964. Delta is -0.012500886093448571 Model is not converging. Current: 595.4187198374369 is not greater than 595.4430166505438. Delta is -0.02429681310684373 Model is not converging. Current: 780.4332295672039 is not greater than 780.442983818365. Delta is -0.009754251161098182 Model is not converging. Current: 783.4636436217628 is not greater than 783.4762710259783. Delta is -0.012627404215550087 Model is not converging. Current: 661.4717871464429 is not greater than 661.480579846008. Delta is -0.008792699565105977 Model is not converging. Current: 430.485271683611 is not greater than 430.48576104116967. Delta is -0.0004893575586493171 Model is not converging. Current: 414.9587845668064 is not greater than 414.95993299754883. Delta is -0.001148430742432538 Model is not converging. Current: 439.9638217529458 is not greater than 439.96421214238666. Delta is -0.00039038944083813476 Model is not converging. Current: 697.6740478631006 is not greater than 697.7169317616757. Delta is -0.0428838985751554 Model is not converging. Current: 713.3632991821065 is not greater than 713.3677052139906. Delta is -0.004406031884059303 Model is not converging. Current: 702.9921798514354 is not greater than 702.9967879565751. Delta is -0.004608105139709551 Model is not converging. Current: 794.1431466841034 is not greater than 794.1446038038556. Delta is -0.0014571197522172952 Model is not converging. Current: 794.1723698299093 is not greater than 794.1813470361395. Delta is -0.008977206230269985 Model is not converging. Current: 793.4660239450867 is not greater than 793.4720438828839. Delta is -0.006019937797191233 Model is not converging. Current: 794.8513919590821 is not greater than 794.8717084851203. Delta is -0.020316526038186566 Model is not converging. Current: 885.1221199364642 is not greater than 885.1643689885387. Delta is -0.04224905207456686 Model is not converging. Current: 890.1465025362984 is not greater than 890.1794216747718. Delta is -0.03291913847340311 Model is not converging. Current: 893.5227171578038 is not greater than 893.537117674615. Delta is -0.014400516811292619 Model is not converging. Current: 911.049662573319 is not greater than 911.0583039213041. Delta is -0.008641347985076209 Model is not converging. Current: 789.4202157468898 is not greater than 789.4224889140556. Delta is -0.0022731671658675623 Model is not converging. Current: 761.2092350754426 is not greater than 761.2118681294932. Delta is -0.002633054050534156 Model is not converging. Current: 766.7599430281199 is not greater than 766.7630284363755. Delta is -0.003085408255628863
=== Model 3 KPIs (native, pre-cost) ===
Strategy Ann.Return Ann.Vol Sharpe MaxDD ES95_day
Equal-Weight 0.0609 0.0725 0.8405 -0.2009 0.0108
Static HRP 0.0176 0.0198 0.8869 -0.0569 0.0031
IVP (Const-Vol RP, rolling) 0.0172 0.0120 1.4308 -0.0306 0.0017
RA-HRP (prob-blend) 0.0140 0.0097 1.4419 -0.0306 0.0011
RA-HRP (mixture-cov HRP) 0.0148 0.0096 1.5317 -0.0306 0.0010
=== Model 3 KPIs (risk-matched @ 10%) ===
Strategy Ann.Return Ann.Vol Sharpe MaxDD ES95_day Alpha(scale)
Equal-Weight 0.0835 0.1000 0.8351 -0.2683 0.0150 1.3802
Static HRP 0.0875 0.1000 0.8750 -0.2628 0.0155 5.0511
IVP (Const-Vol RP, rolling) 0.1473 0.1000 1.4733 -0.2303 0.0142 8.3250
RA-HRP (prob-blend) 0.1488 0.1000 1.4877 -0.2777 0.0113 10.3164
RA-HRP (mixture-cov HRP) 0.1590 0.1000 1.5898 -0.2793 0.0108 10.3824
Saved net-of-costs grid → ./out/m3_kpi_matchedVol_costGrid.csv
Figures & tables saved in: /Users/jeremy.duriez/Desktop/recherche2/out3
IA + HMM (Neural prior fused with HMM)¶
#!/usr/bin/env python3
# Test 8 — IA + HMM (Neural Prior fused with HMM one-step probabilities) — FAIR VERSION
# --------------------------------------------------------------------------------------
# Core unchanged:
# - K=3, ROLL_FIT_N=500, REFIT_EVERY=3
# - Features x = [log(EW vol, 21d), log(PC1 variance share, 63d)]
# - HMM on x; Tiny MLP learns P_NN(s_t|x_t) on last train window; Fusion: tempered PoE
# - Allocation: probabilistic blend of per-regime MV weights from returns
#
# Fairness/robustness (like Models 1/2/3):
# - Add IVP baseline, ES95 metric, risk-matched (10% vol) headline table
# - Net-of-costs grid (5/10/25/50 bps), subperiod KPIs, turnover stats
# - SAFE turnover density (no SciPy KDE)
# - HMM calibration (Brier/ECE) on fused one-step-ahead probs
# - Data manifest
#
# 10% pb fixed for Static MV:
# - Static MV weights trained once on initial window (no look-ahead), then held
# - When risk-matching Static MV: cap leverage (ALPHA_CAP_STATIC) and use vol floor
# --------------------------------------------------------------------------------------
import os, glob, warnings
warnings.filterwarnings("ignore")
import numpy as np, pandas as pd, matplotlib.pyplot as plt
DATA_DIR = "./data2"
OUT_DIR = "./out4"
K = 3
ROLL_VOL_N = 21
ROLL_PC_WIN = 63
ROLL_FIT_N = 500
REFIT_EVERY = 3
RIDGE = 1e-6
CLIP = 0.5
SEED = 7
# IA (NN)
FUSE_BETA = 0.40
NN_HIDDEN = 16
NN_EPOCHS = 80
NN_LR = 5e-3
NN_WEIGHT_DECAY = 1e-4
NN_DROPOUT = 0.10
# Fairness/reporting
TARGET_VOL = 0.10
COST_BPS_GRID = [5,10,25,50]
IVP_WIN = 63
IVP_REBAL = 21
# Static baseline safeguards (fix the 10% pb)
STATIC_TRAIN_WIN = 500
ALPHA_CAP_STATIC = 5.0 # <= 5x scaling max when risk-matching static
VOL_FLOOR_STATIC = 0.02 # 2% annual vol floor for static risk-matching
np.random.seed(SEED)
os.makedirs(OUT_DIR, exist_ok=True)
# ------------ Imports (hmmlearn + torch) -------------
try:
from hmmlearn.hmm import GaussianHMM
except Exception as e:
raise SystemExit("pip install hmmlearn\nOriginal error: %r" % e)
try:
import torch
import torch.nn as nn
import torch.optim as optim
except Exception as e:
raise SystemExit("pip install torch\nOriginal error: %r" % e)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
torch.manual_seed(SEED)
# ---------------- Helpers ----------------
def load_data_from_dir(path):
files = sorted(glob.glob(os.path.join(path, "*.csv")))
if not files:
raise FileNotFoundError(f"No CSVs found in {path}")
frames = []
for f in files:
tkr = os.path.splitext(os.path.basename(f))[0].upper()
df = pd.read_csv(f)
if "Date" not in df.columns and "date" in df.columns:
df.rename(columns={"date":"Date"}, inplace=True)
close_col = None
for c in ["Close","Adj Close","close","adj_close","AdjClose"]:
if c in df.columns:
close_col = c; break
if close_col is None:
continue
df = df[["Date", close_col]].rename(columns={close_col: tkr})
df["Date"] = pd.to_datetime(df["Date"])
frames.append(df)
if not frames:
raise ValueError("No usable files with Close/Adj Close.")
out = frames[0]
for df in frames[1:]:
out = out.merge(df, on="Date", how="outer")
out.sort_values("Date", inplace=True)
out.set_index("Date", inplace=True)
return out.ffill().dropna(how="all")
def to_log_returns(P): return np.log(P/P.shift(1))
def ew_vol_feature(R, n=21):
ew = R.mean(axis=1)
vol = ew.rolling(n).std()
return np.log(vol + 1e-12).rename("x_ewvol")
def pca_pc1_var_feature(R, win=63):
from numpy.linalg import eigh
vals, idxs = [], []
for i in range(win-1, len(R)):
X = R.iloc[i-win+1:i+1].dropna(how="any")
if len(X) < win:
continue
C = X.cov().values
evals, _ = eigh(C) # ascending
lam1 = float(evals[-1]); total = float(evals.sum()) + 1e-12
vals.append(np.log(lam1/total + 1e-12)); idxs.append(R.index[i])
return pd.Series(vals, index=idxs, name="x_pc1")
def ann_kpis_from_returns(rets, freq=252):
rets = pd.Series(rets).dropna()
if rets.empty: return dict(ret=np.nan, vol=np.nan, sharpe=np.nan, maxdd=np.nan, es95=np.nan)
curve=(1+rets).cumprod()
maxdd=(curve/curve.cummax()-1).min()
ann_ret=(1+rets).prod()**(freq/len(rets)) - 1
ann_vol=rets.std()*np.sqrt(freq)
sharpe=ann_ret/ann_vol if ann_vol>0 else np.nan
q=np.quantile(rets, 0.05)
es95 = -rets[rets<=q].mean() if (rets<=q).any() else np.nan
return dict(ret=ann_ret, vol=ann_vol, sharpe=sharpe, maxdd=maxdd, es95=es95)
def smooth_drawdown_from_returns(rets):
curve=(1+pd.Series(rets).fillna(0)).cumprod()
dd=curve/curve.cummax()-1.0
return curve, dd
def mean_var_weights(mu, Sigma, ridge=1e-6, clip=0.5):
n = len(mu)
S = Sigma + ridge*np.eye(n)
w = np.linalg.pinv(S).dot(mu.reshape(-1,1)).ravel()
w = w/np.abs(w).sum() if np.abs(w).sum()>0 else np.ones(n)/n
w = np.clip(w, -clip, clip)
w = w/np.abs(w).sum() if np.abs(w).sum()>0 else np.ones(n)/n
return w
def inverse_vol_weights(R_window: pd.DataFrame):
vol = R_window.std().replace(0, np.nan)
w = 1.0/vol
w = w.replace([np.inf,-np.inf], np.nan).fillna(0.0)
if w.sum()>0: w = w/w.sum()
else: w = pd.Series(np.ones(len(R_window.columns))/len(R_window.columns), index=R_window.columns)
return w.values
def turnover_series(W: pd.DataFrame):
Wf=W.fillna(method="ffill").fillna(0.0)
dW=(Wf - Wf.shift(1)).abs().sum(axis=1)*0.5
dW.iloc[0]=0.0
return dW
def apply_costs(gross: pd.Series, turnover: pd.Series, cost_bps: float):
c=cost_bps/10000.0
return gross - c*turnover
def risk_match(rets: pd.Series, target_vol=0.10, freq=252, alpha_max=np.inf, vol_floor=0.0):
vol = rets.std()*np.sqrt(freq)
if not np.isfinite(vol) or vol<=0: return rets.copy(), 1.0
denom = max(vol, vol_floor)
alpha = target_vol / denom
if np.isfinite(alpha_max): alpha = min(alpha, alpha_max)
return rets*alpha, alpha
# ---------------- IA (tiny MLP) ----------------
class TinyMLP(nn.Module):
def __init__(self, in_dim, out_dim, hid=16, p=0.1):
super().__init__()
self.net = nn.Sequential(
nn.Linear(in_dim, hid), nn.ReLU(),
nn.Dropout(p),
nn.Linear(hid, hid), nn.ReLU(),
nn.Dropout(p),
nn.Linear(hid, out_dim)
)
def forward(self, x): # logits
return self.net(x)
@torch.no_grad()
def nn_predict_proba(model, X):
model.eval()
logits = model(torch.as_tensor(X, dtype=torch.float32, device=DEVICE))
probs = torch.softmax(logits, dim=-1).cpu().numpy()
return probs
def nn_train(model, X, y, epochs=80, lr=5e-3, wd=1e-4):
model.train()
X_t = torch.as_tensor(X, dtype=torch.float32, device=DEVICE)
y_t = torch.as_tensor(y, dtype=torch.long, device=DEVICE)
opt = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
loss_fn = nn.CrossEntropyLoss()
for _ in range(epochs):
opt.zero_grad()
logits = model(X_t)
loss = loss_fn(logits, y_t)
loss.backward()
opt.step()
# ---------------- Load data & features ----------------
prices = load_data_from_dir(DATA_DIR)
R = to_log_returns(prices).dropna()
N = R.shape[1]
x_ew = ew_vol_feature(R, n=ROLL_VOL_N)
x_pc = pca_pc1_var_feature(R, win=ROLL_PC_WIN)
x_df = pd.concat([x_ew, x_pc], axis=1).dropna()
R_aligned = R.loc[x_df.index]
dates = x_df.index
tickers = list(R_aligned.columns)
print(f"Loaded {N} assets; feature rows={len(dates)}; features={list(x_df.columns)}")
# Manifest
manifest = pd.DataFrame({
"Ticker": R_aligned.columns,
"FirstDate": [R_aligned.index.min()]*N,
"LastDate": [R_aligned.index.max()]*N,
"Obs": [R_aligned[c].dropna().shape[0] for c in R_aligned.columns]
})
manifest.to_csv(os.path.join(OUT_DIR, "t8_manifest.csv"), index=False)
# ---------------- Rolling IA+HMM ----------------
labels = pd.Series(index=dates, dtype=int)
filt_prob = pd.DataFrame(index=dates, columns=[f"p{k}" for k in range(K)], dtype=float)
ahead_prob = filt_prob.copy()
fused_prob = filt_prob.copy()
model_hmm = None
last_fit = None
# IA model (2→K)
nn_model = TinyMLP(in_dim=x_df.shape[1], out_dim=K, hid=NN_HIDDEN, p=NN_DROPOUT).to(DEVICE)
for i, dt in enumerate(dates):
refit = (last_fit is None) or ((i - last_fit) >= REFIT_EVERY)
# HMM (train / refit on last 500 obs of features)
if refit:
start_fit = max(0, i - ROLL_FIT_N)
X_fit = x_df.iloc[start_fit:i+1].values
if len(X_fit) < max(120, ROLL_FIT_N//4):
continue
model_hmm = GaussianHMM(n_components=K, covariance_type='diag', n_iter=200, random_state=SEED)
model_hmm.fit(X_fit)
last_fit = i
# Obtain teacher labels over the training window for NN (Viterbi path)
path_fit = model_hmm.predict(X_fit)
# Train NN to predict s_t from x_t
try:
nn_train(nn_model, X_fit, path_fit, epochs=NN_EPOCHS, lr=NN_LR, wd=NN_WEIGHT_DECAY)
except Exception as e:
print("NN training skipped due to:", e)
# Inference on rolling window (for probabilities)
start = max(0, i - ROLL_FIT_N)
X_win = x_df.iloc[start:i+1].values
if len(X_win) < 2:
continue
path = model_hmm.predict(X_win)
post = model_hmm.predict_proba(X_win)
labels.iloc[i] = path[-1]
alpha_t = post[-1]
filt_prob.iloc[i] = alpha_t
p_hmm_next = alpha_t @ model_hmm.transmat_
ahead_prob.iloc[i] = p_hmm_next
# IA prior for s_{t+1}: predict s_t from x_t, then push through A
p_nn_t = nn_predict_proba(nn_model, X_win[-1:].copy())[0] # P_NN(s_t | x_t)
p_nn_next = p_nn_t @ model_hmm.transmat_
# Fuse (tempered product of experts)
eps = 1e-12
fused = (p_hmm_next + eps)**(1.0 - FUSE_BETA) * (p_nn_next + eps)**(FUSE_BETA)
fused = fused / fused.sum()
fused_prob.iloc[i] = fused
# ---- Remap states calm→mid→stress by feature stress score
stress_score = (x_df["x_ewvol"] - x_df["x_ewvol"].mean())/(x_df["x_ewvol"].std()+1e-12) + \
(x_df["x_pc1"] - x_df["x_pc1"].mean()) /(x_df["x_pc1"].std() +1e-12)
means = [(k, stress_score[labels==k].mean()) for k in range(K)]
order = [k for (k, _) in sorted(means, key=lambda z:z[1])]
remap = {old:new for new, old in enumerate(order)}
labels = labels.map(remap)
filt_prob = filt_prob.rename(columns={f"p{old}":f"p{remap[old]}" for old in range(K)})
ahead_prob = ahead_prob.rename(columns={f"p{old}":f"p{remap[old]}" for old in range(K)})
fused_prob = fused_prob.rename(columns={f"p{old}":f"p{remap[old]}" for old in range(K)})
# ---------------- Per-regime weights (MV, NON-CAUSAL, like others) ----------------
def regime_weights_from_labels(R, labels, ridge=1e-6, clip=0.5):
N = R.shape[1]; out = {}
for k in sorted(labels.dropna().unique()):
idx = labels.index[labels==k]
if len(idx) < max(60, N*5):
out[int(k)] = np.ones(N)/N
else:
sub = R.loc[idx]
mu = sub.mean().values
Sigma = sub.cov().values
out[int(k)] = mean_var_weights(mu, Sigma, ridge=ridge, clip=clip)
return out
regime_w = regime_weights_from_labels(R_aligned, labels, RIDGE, CLIP)
# ---------------- Portfolio from fused probabilities ---------------
W_fused = pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
for dt in R_aligned.index:
p_row = fused_prob.loc[dt]
if p_row.isna().any():
W_fused.loc[dt] = np.ones(N)/N
continue
w = np.zeros(N)
for k in range(K):
w += float(p_row[f"p{k}"]) * regime_w.get(k, np.ones(N)/N)
w = np.clip(w, -CLIP, CLIP)
w = w/np.abs(w).sum() if np.abs(w).sum()>0 else np.ones(N)/N
W_fused.loc[dt] = w
ret_fused = (R_aligned * W_fused.shift(1)).sum(axis=1).fillna(0.0)
turn_fused = turnover_series(W_fused)
# ---------------- Baselines ----------------
# Equal-Weight
w_ew = np.ones(N)/N
W_ew = pd.DataFrame([w_ew]*len(R_aligned), index=R_aligned.index, columns=R_aligned.columns)
ret_ew = (R_aligned @ pd.Series(w_ew, index=R_aligned.columns)).fillna(0.0)
turn_ew = turnover_series(W_ew)
# Static MV — FIXED (train once on initial window; held constant)
train_n = min(STATIC_TRAIN_WIN, len(R_aligned))
Sigma_train = R_aligned.iloc[:train_n].cov().values
mu_train = R_aligned.iloc[:train_n].mean().values
w_static = mean_var_weights(mu_train, Sigma_train, ridge=RIDGE, clip=CLIP)
W_static = pd.DataFrame([w_static]*len(R_aligned), index=R_aligned.index, columns=R_aligned.columns)
ret_static = (R_aligned @ pd.Series(w_static, index=R_aligned.columns)).fillna(0.0)
turn_static = turnover_series(W_static)
# Constant-Vol Risk-Parity (IVP), rolling
W_ivp = pd.DataFrame(index=R_aligned.index, columns=R_aligned.columns, dtype=float)
for i, dt in enumerate(R_aligned.index):
if i<IVP_WIN:
W_ivp.iloc[i]=np.ones(N)/N
else:
if i % IVP_REBAL == 0 or i == IVP_WIN:
Rw=R_aligned.iloc[i-IVP_WIN:i]
W_ivp.iloc[i]=inverse_vol_weights(Rw)
else:
W_ivp.iloc[i]=W_ivp.iloc[i-1]
W_ivp=W_ivp.fillna(method="ffill").fillna(1.0/N)
ret_ivp=(R_aligned * W_ivp.shift(1)).sum(axis=1).fillna(0.0)
turn_ivp = turnover_series(W_ivp)
# ---------------- KPIs (native, pre-cost) ----------------
def krow(name, rets):
k=ann_kpis_from_returns(rets)
return [name, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']]
kpi_native = pd.DataFrame(
[krow("Equal-Weight", ret_ew),
krow("Static MV", ret_static),
krow("IVP (Const-Vol RP, rolling)", ret_ivp),
krow("IA+HMM (fused)", ret_fused)],
columns=["Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day"]
)
kpi_native.to_csv(os.path.join(OUT_DIR,"t8_kpi_native.csv"), index=False)
print("\n=== Test 8 KPIs (native, pre-cost) ===")
print(kpi_native.to_string(index=False, float_format=lambda x: f"{x:,.4f}"))
# ---------------- Risk-matched @ 10% (with static cap/floor) ---------------
def matched_series_and_alpha(name, rets, turn, *, alpha_max=np.inf, vol_floor=0.0):
rm, alpha = risk_match(rets, TARGET_VOL, alpha_max=alpha_max, vol_floor=vol_floor)
return name, rm, turn*alpha, alpha, ann_kpis_from_returns(rm)
matched = {}
turn_m = {}
alphas = {}
rows=[]
for name, rets, turn in [
("Equal-Weight", ret_ew, turn_ew),
("Static MV", ret_static, turn_static),
("IVP (Const-Vol RP, rolling)", ret_ivp, turn_ivp),
("IA+HMM (fused)", ret_fused, turn_fused),
]:
if name == "Static MV":
nm, rm, tm, a, k = matched_series_and_alpha(name, rets, turn,
alpha_max=ALPHA_CAP_STATIC,
vol_floor=VOL_FLOOR_STATIC)
else:
nm, rm, tm, a, k = matched_series_and_alpha(name, rets, turn)
matched[nm]=rm; turn_m[nm]=tm; alphas[nm]=a
rows.append([nm, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95'], a])
kpi_matched = pd.DataFrame(rows, columns=["Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day","Alpha(scale)"])
kpi_matched.to_csv(os.path.join(OUT_DIR,"t8_kpi_matchedVol.csv"), index=False)
print("\n=== Test 8 KPIs (risk-matched @ {:.0f}%) ===".format(TARGET_VOL*100))
print(kpi_matched.to_string(index=False, float_format=lambda x: f"{x:,.4f}"))
# ---------------- Net-of-costs grid ---------------------------------------
def cost_grid(cost_list):
out=[]
for c in cost_list:
for nm in matched:
net = apply_costs(matched[nm], turn_m[nm], c)
k = ann_kpis_from_returns(net)
out.append([c, nm, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']])
return pd.DataFrame(out, columns=["Cost_bps","Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day"])
kpi_costs = cost_grid(COST_BPS_GRID)
kpi_costs.to_csv(os.path.join(OUT_DIR,"t8_kpi_matchedVol_costGrid.csv"), index=False)
print("\nSaved net-of-costs grid → ./out/t8_kpi_matchedVol_costGrid.csv")
# ---------------- Turnover stats & SAFE density plot -----------------------
turn_stats=[]
for nm, ts in turn_m.items():
turn_stats.append([nm, ts.mean(), ts.median(), ts.quantile(0.9), ts.max()])
turn_df = pd.DataFrame(turn_stats, columns=["Strategy","Turnover_mean","Turnover_median","Turnover_p90","Turnover_max"])
turn_df.to_csv(os.path.join(OUT_DIR,"t8_turnover_stats.csv"), index=False)
def plot_turnover_distribution(turn_map, out_path):
fig, ax = plt.subplots(figsize=(10,4))
for name, ts in turn_map.items():
s = pd.Series(ts).replace([np.inf, -np.inf], np.nan).dropna()
s = s[np.isfinite(s)]
if (len(s) == 0) or (s.std() <= 1e-12) or (s.nunique() < 3):
val = float(s.iloc[0]) if len(s) else 0.0
ax.axvline(val, linestyle="--", linewidth=1.5, label=f"{name} (spike)")
else:
counts, bins = np.histogram(s, bins=40, density=True)
centers = 0.5*(bins[1:] + bins[:-1])
ax.plot(centers, counts, linewidth=1.5, label=name)
ax.set_title("Turnover distribution (one-way, risk-matched)")
ax.grid(True); ax.legend()
fig.tight_layout(); fig.savefig(out_path, dpi=180)
plot_turnover_distribution(turn_m, os.path.join(OUT_DIR,"t8_fig_turnover_density.png"))
# ---------------- Subperiod KPIs (risk-matched) ----------------------------
def subperiod_slices(idx):
eras=[("2000-01-01","2009-12-31"), ("2010-01-01","2019-12-31"), ("2020-01-01", str(idx.max().date()))]
out=[]
for s,e in eras:
sdt=pd.Timestamp(s); edt=pd.Timestamp(e)
mask=(idx>=sdt)&(idx<=edt)
if mask.any(): out.append((s,e,mask))
return out
subs=[]
for (s,e,mask) in subperiod_slices(R_aligned.index):
for nm in matched:
k=ann_kpis_from_returns(matched[nm].loc[mask])
subs.append([f"{s}–{e}", nm, k['ret'], k['vol'], k['sharpe'], k['maxdd'], k['es95']])
sub_df=pd.DataFrame(subs, columns=["Period","Strategy","Ann.Return","Ann.Vol","Sharpe","MaxDD","ES95_day"])
sub_df.to_csv(os.path.join(OUT_DIR,"t8_kpi_matchedVol_subperiods.csv"), index=False)
# ---------------- HMM calibration (one-step-ahead, fused) ------------------
probs = fused_prob.copy().dropna()
y_true = labels.reindex(probs.index).shift(-1).dropna()
probs = probs.loc[y_true.index]
Y = np.zeros((len(y_true), K))
for i, y in enumerate(y_true.values.astype(int)):
if 0 <= y < K: Y[i, y]=1.0
P = probs[[f"p{k}" for k in range(K)]].values
brier = np.mean(np.sum((P - Y)**2, axis=1))
conf = P.max(axis=1); pred = P.argmax(axis=1); acc = (pred==y_true.values.astype(int)).astype(float)
bins = np.linspace(0,1,11); ece=0.0
for b0, b1 in zip(bins[:-1], bins[1:]):
idx = (conf>=b0) & (conf<b1)
if idx.any():
ece += np.mean(idx) * abs(acc[idx].mean() - conf[idx].mean())
pd.DataFrame({"Metric":["Brier","ECE_top10"], "Value":[brier, ece]}).to_csv(os.path.join(OUT_DIR,"t8_hmm_calibration.csv"), index=False)
# ---------------- Figures ---------------------------------------------------
# Risk-matched log cumulative wealth (pre-cost)
plt.figure(figsize=(10,4))
for nm, sr in matched.items():
curve=(1+sr).cumprod()
plt.plot(curve.index, np.log(curve), label=nm)
plt.title("Log Cumulative Wealth — Risk-matched (10% vol)") # Static may be <10% if capped
plt.legend(); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"t8_fig_cum_wealth_matched.png"), dpi=180)
# Risk-matched drawdowns
def ddplot_from_rets(name, rets):
curve, dd = smooth_drawdown_from_returns(rets)
plt.figure(figsize=(10,3)); plt.plot(dd)
plt.title(f"Drawdown (risk-matched) — {name}"); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, f"t8_fig_drawdown_matched_{name.replace(' ','_').replace('-','')}.png"), dpi=180)
for nm, sr in matched.items():
ddplot_from_rets(nm, sr)
# Native wealth (pre-cost)
curve_fused=(1+ret_fused).cumprod(); curve_static=(1+ret_static).cumprod()
curve_ivp=(1+ret_ivp).cumprod(); curve_ew=(1+ret_ew).cumprod()
plt.figure(figsize=(10,4))
plt.plot(np.log(curve_fused), label="IA+HMM (fused)")
plt.plot(np.log(curve_static), label="Static MV")
plt.plot(np.log(curve_ivp), label="IVP (rolling)")
plt.plot(np.log(curve_ew), label="Equal-Weight")
plt.title("Log Cumulative Wealth — Test 8 (native)"); plt.legend(); plt.grid(True, alpha=.3); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"t8_cum_wealth_native.png"), dpi=180)
# Probability ribbon (fused)
cols=[f"p{k}" for k in range(K)]
pdf=fused_prob[cols].dropna(how="any")
plt.figure(figsize=(12,3.6))
plt.stackplot(pdf.index, pdf.values.T, labels=cols, alpha=0.9)
plt.ylim(0,1); plt.title("One-step-ahead Probabilities — IA+HMM (fused)"); plt.legend(ncol=K, fontsize=8, frameon=False)
plt.grid(True, alpha=.2); plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR,"t8_prob_ribbon.png"), dpi=180)
# Weights timeline (fused)
plt.figure(figsize=(11,4))
plt.stackplot(W_fused.index, W_fused.fillna(method="ffill").values.T, labels=tickers)
plt.title("Portfolio Weights — IA+HMM (fused)"); plt.legend(loc="upper left", ncol=min(N,4), fontsize=8)
plt.tight_layout(); plt.savefig(os.path.join(OUT_DIR,"t8_weights_fused.png"), dpi=180)
print(f"\nFigures & tables saved in: {os.path.abspath(OUT_DIR)}")
Model is not converging. Current: 389.87283970835267 is not greater than 389.91854359257286. Delta is -0.04570388422018823
Loaded 13 assets; feature rows=3362; features=['x_ewvol', 'x_pc1']
Model is not converging. Current: 395.60349779898564 is not greater than 395.6122636760006. Delta is -0.008765877014980106 Model is not converging. Current: 404.2599203217861 is not greater than 404.26936548010417. Delta is -0.009445158318044378 Model is not converging. Current: 420.36977301248635 is not greater than 420.38081938787917. Delta is -0.011046375392822938 Model is not converging. Current: 532.7838403663428 is not greater than 532.8045589952701. Delta is -0.020718628927284044 Model is not converging. Current: 523.2122200256835 is not greater than 523.2601395497974. Delta is -0.04791952411392231 Model is not converging. Current: 709.8789261884571 is not greater than 709.8798558887838. Delta is -0.0009297003267647597 Model is not converging. Current: 692.1376087245156 is not greater than 692.1396690939667. Delta is -0.0020603694511009962 Model is not converging. Current: 677.1086596972053 is not greater than 677.1110946649715. Delta is -0.0024349677661348323 Model is not converging. Current: 497.3941108311458 is not greater than 497.3941870640186. Delta is -7.623287280011937e-05 Model is not converging. Current: 647.2603760262214 is not greater than 647.2612236604854. Delta is -0.0008476342640051371 Model is not converging. Current: 638.4778442645846 is not greater than 638.4848189671235. Delta is -0.0069747025388551265 Model is not converging. Current: 497.62463232852065 is not greater than 497.62496636703685. Delta is -0.00033403851620050773 Model is not converging. Current: 468.37887909742966 is not greater than 468.3835790480223. Delta is -0.004699950592623736 Model is not converging. Current: 473.0591907627078 is not greater than 473.0661737642574. Delta is -0.006983001549656365 Model is not converging. Current: 560.0942112781146 is not greater than 560.0944139239596. Delta is -0.000202645844979088 Model is not converging. Current: 561.9855916122344 is not greater than 561.9859251994662. Delta is -0.0003335872318075417 Model is not converging. Current: 560.9462865959019 is not greater than 560.9467565087089. Delta is -0.00046991280692054715 Model is not converging. Current: 569.9735594618257 is not greater than 569.9888135330621. Delta is -0.015254071236313393 Model is not converging. Current: 513.9888519080189 is not greater than 514.017249539479. Delta is -0.02839763146016594 Model is not converging. Current: 505.9065507840104 is not greater than 505.9325281762053. Delta is -0.02597739219493178 Model is not converging. Current: 496.8747461679629 is not greater than 496.8940313340508. Delta is -0.01928516608791142 Model is not converging. Current: 493.10471063548647 is not greater than 493.1100064974062. Delta is -0.005295861919705658 Model is not converging. Current: 453.5731327851453 is not greater than 453.5732345809892. Delta is -0.00010179584393199548 Model is not converging. Current: 444.7933227674599 is not greater than 444.7935453070773. Delta is -0.00022253961736851124 Model is not converging. Current: 398.01333655053435 is not greater than 398.0281765801007. Delta is -0.01484002956635777 Model is not converging. Current: 390.7892675515517 is not greater than 390.80067757864106. Delta is -0.011410027089368668 Model is not converging. Current: 406.3594393384705 is not greater than 406.38140047739375. Delta is -0.02196113892324547 Model is not converging. Current: 408.57293892512826 is not greater than 408.59163249167705. Delta is -0.01869356654879084 Model is not converging. Current: 422.517630334027 is not greater than 422.5401400854047. Delta is -0.022509751377697285 Model is not converging. Current: 427.84845482848306 is not greater than 427.87875449935547. Delta is -0.03029967087240948 Model is not converging. Current: 432.9376840215134 is not greater than 432.93894313415836. Delta is -0.0012591126449592593 Model is not converging. Current: 429.7194932000672 is not greater than 429.7201140128609. Delta is -0.0006208127937270547 Model is not converging. Current: 444.63636410513374 is not greater than 444.67077087383865. Delta is -0.03440676870491188 Model is not converging. Current: 458.890811536571 is not greater than 458.9368395188587. Delta is -0.04602798228768279 Model is not converging. Current: 501.7909691715577 is not greater than 501.80617906783107. Delta is -0.015209896273347567 Model is not converging. Current: 516.968558607946 is not greater than 516.9793435893532. Delta is -0.010784981407255145 Model is not converging. Current: 566.9588058217353 is not greater than 566.959076575291. Delta is -0.0002707535556965013 Model is not converging. Current: 576.9378295435155 is not greater than 576.9395475683427. Delta is -0.001718024827255249 Model is not converging. Current: 563.3181894420273 is not greater than 563.3188705877014. Delta is -0.0006811456740933863 Model is not converging. Current: 566.5981866707649 is not greater than 566.5982157718776. Delta is -2.910111265919113e-05 Model is not converging. Current: 559.3862085574262 is not greater than 559.3910065990974. Delta is -0.004798041671165265 Model is not converging. Current: 565.6964827430708 is not greater than 565.6999572398837. Delta is -0.003474496812941652 Model is not converging. Current: 562.0576815375418 is not greater than 562.0609985615853. Delta is -0.003317024043440142 Model is not converging. Current: 571.18586272219 is not greater than 571.1881566951915. Delta is -0.0022939730015423265 Model is not converging. Current: 559.1272190083262 is not greater than 559.1291104979588. Delta is -0.0018914896326123198 Model is not converging. Current: 602.2516234704003 is not greater than 602.2524558736724. Delta is -0.000832403272056581 Model is not converging. Current: 623.3517198848233 is not greater than 623.3529717342053. Delta is -0.0012518493820152798 Model is not converging. Current: 643.2416420327299 is not greater than 643.2429544022963. Delta is -0.001312369566335292 Model is not converging. Current: 645.9588758596371 is not greater than 645.9594446361631. Delta is -0.0005687765259381194 Model is not converging. Current: 641.725184585292 is not greater than 641.7292498974475. Delta is -0.004065312155489664 Model is not converging. Current: 640.6099708196236 is not greater than 640.6100062032195. Delta is -3.5383595900384535e-05 Model is not converging. Current: 641.1182704187269 is not greater than 641.121223220791. Delta is -0.002952802064100979 Model is not converging. Current: 642.4738808363991 is not greater than 642.4779252570721. Delta is -0.004044420673039895 Model is not converging. Current: 646.2972767632926 is not greater than 646.3010637404075. Delta is -0.003786977114828005 Model is not converging. Current: 648.2628522276609 is not greater than 648.2657217857371. Delta is -0.002869558076213252 Model is not converging. Current: 781.17901881952 is not greater than 781.1807263215957. Delta is -0.0017075020756465165 Model is not converging. Current: 844.0410423180136 is not greater than 844.0419943069044. Delta is -0.0009519888907334462 Model is not converging. Current: 821.6122452382398 is not greater than 821.6142392136907. Delta is -0.0019939754508868646 Model is not converging. Current: 608.4144776748001 is not greater than 608.4171884011388. Delta is -0.0027107263387051717 Model is not converging. Current: 615.5738343544865 is not greater than 615.5767204248887. Delta is -0.002886070402155383 Model is not converging. Current: 691.0862571102078 is not greater than 691.0904891170456. Delta is -0.0042320068378103315 Model is not converging. Current: 691.884083607118 is not greater than 691.8883400837645. Delta is -0.004256476646560259 Model is not converging. Current: 691.5933614977193 is not greater than 691.5981597205036. Delta is -0.004798222784302197 Model is not converging. Current: 636.1451718217206 is not greater than 636.1457524068984. Delta is -0.0005805851777722637 Model is not converging. Current: 646.7160351641934 is not greater than 647.1561691657477. Delta is -0.44013400155427007 Model is not converging. Current: 661.8167191709838 is not greater than 662.0372497037188. Delta is -0.22053053273498335 Model is not converging. Current: 667.1582536874924 is not greater than 667.4221345743808. Delta is -0.26388088688838707 Model is not converging. Current: 672.7331673794818 is not greater than 673.0540673213847. Delta is -0.32089994190289417 Model is not converging. Current: 666.7998120075691 is not greater than 667.0102819612648. Delta is -0.21046995369567867 Model is not converging. Current: 666.3059014283275 is not greater than 666.4048955082361. Delta is -0.09899407990860709 Model is not converging. Current: 613.440410328221 is not greater than 613.5257559376993. Delta is -0.0853456094782814 Model is not converging. Current: 606.1417442396211 is not greater than 606.1951246358524. Delta is -0.05338039623131863 Model is not converging. Current: 600.7891245374491 is not greater than 600.8618575142059. Delta is -0.07273297675681079 Model is not converging. Current: 590.0162507323257 is not greater than 590.1353875098237. Delta is -0.1191367774979426 Model is not converging. Current: 606.3707454253064 is not greater than 606.4580413214413. Delta is -0.08729589613494682 Model is not converging. Current: 605.4732142568412 is not greater than 605.5274956160399. Delta is -0.054281359198739665 Model is not converging. Current: 605.5176509042411 is not greater than 605.525586811296. Delta is -0.007935907054843483 Model is not converging. Current: 606.2565081533663 is not greater than 606.2848715353983. Delta is -0.02836338203201194 Model is not converging. Current: 606.7509265233965 is not greater than 606.7553221946939. Delta is -0.004395671297402259 Model is not converging. Current: 558.8773167497272 is not greater than 558.8813221806857. Delta is -0.004005430958500256 Model is not converging. Current: 597.0666982370659 is not greater than 597.0797391331097. Delta is -0.013040896043776229 Model is not converging. Current: 554.752573775024 is not greater than 554.7565105457054. Delta is -0.003936770681434609 Model is not converging. Current: 553.1359039678034 is not greater than 553.1395933998209. Delta is -0.0036894320174951645 Model is not converging. Current: 551.4789157504738 is not greater than 551.5515755562857. Delta is -0.07265980581189524 Model is not converging. Current: 663.3510832251052 is not greater than 663.3841216375271. Delta is -0.0330384124218881 Model is not converging. Current: 656.1958205050584 is not greater than 656.21101485804. Delta is -0.01519435298166627 Model is not converging. Current: 620.2789008812879 is not greater than 620.2820550931145. Delta is -0.0031542118266543184 Model is not converging. Current: 622.9654494902916 is not greater than 622.9906149110335. Delta is -0.025165420741927846 Model is not converging. Current: 664.317944780949 is not greater than 664.3181075029149. Delta is -0.0001627219659212642 Model is not converging. Current: 687.4575429999189 is not greater than 687.4623321099035. Delta is -0.004789109984585593 Model is not converging. Current: 693.5398754709931 is not greater than 693.5629393372019. Delta is -0.023063866208758554 Model is not converging. Current: 685.5607089768239 is not greater than 685.6270437672385. Delta is -0.06633479041465762 Model is not converging. Current: 685.6979303426833 is not greater than 685.7729752831822. Delta is -0.0750449404988558 Model is not converging. Current: 685.307338792074 is not greater than 685.3878031435186. Delta is -0.08046435144456154 Model is not converging. Current: 684.7568930217823 is not greater than 684.8396052391329. Delta is -0.08271221735060408 Model is not converging. Current: 691.64895167386 is not greater than 691.6602250415447. Delta is -0.011273367684680125 Model is not converging. Current: 686.175677832811 is not greater than 686.2529891027104. Delta is -0.07731126989949644 Model is not converging. Current: 683.7305017671897 is not greater than 683.7956175277266. Delta is -0.0651157605368553 Model is not converging. Current: 657.1410721571496 is not greater than 657.1528432752615. Delta is -0.01177111811193754 Model is not converging. Current: 626.5788044697925 is not greater than 626.5833217805884. Delta is -0.00451731079590445 Model is not converging. Current: 609.9742723842236 is not greater than 609.9932929883287. Delta is -0.019020604105094208 Model is not converging. Current: 610.8768527483965 is not greater than 610.8803358151295. Delta is -0.003483066732997031 Model is not converging. Current: 607.7430210659807 is not greater than 607.758386675833. Delta is -0.015365609852324269 Model is not converging. Current: 605.2001156266639 is not greater than 605.2177796053794. Delta is -0.01766397871551817 Model is not converging. Current: 632.2520822580158 is not greater than 632.3096044689308. Delta is -0.05752221091506726 Model is not converging. Current: 624.7246277998114 is not greater than 624.8197662184324. Delta is -0.0951384186209907 Model is not converging. Current: 633.8707062631968 is not greater than 633.8725145960686. Delta is -0.0018083328718603298 Model is not converging. Current: 691.1557900097373 is not greater than 691.1605245976957. Delta is -0.004734587958409975 Model is not converging. Current: 696.3569925712039 is not greater than 696.3576699852701. Delta is -0.0006774140662173522 Model is not converging. Current: 735.9966012205355 is not greater than 736.1497463670136. Delta is -0.15314514647809574 Model is not converging. Current: 741.4090534127449 is not greater than 741.5674348477294. Delta is -0.1583814349844488 Model is not converging. Current: 806.2690004214189 is not greater than 806.2716947495585. Delta is -0.0026943281395688246 Model is not converging. Current: 870.4417880625542 is not greater than 870.4421267040966. Delta is -0.0003386415423847211 Model is not converging. Current: 874.2910441909661 is not greater than 874.2911536886778. Delta is -0.00010949771171908651 Model is not converging. Current: 902.5394478540348 is not greater than 902.5482488208984. Delta is -0.008800966863532267 Model is not converging. Current: 907.9117738031123 is not greater than 907.9208997807042. Delta is -0.00912597759190703 Model is not converging. Current: 917.1479211180495 is not greater than 917.1547300293647. Delta is -0.006808911315260957 Model is not converging. Current: 920.6646704957557 is not greater than 920.6710136945203. Delta is -0.006343198764625413 Model is not converging. Current: 921.6073421545094 is not greater than 921.6138747908119. Delta is -0.006532636302495121 Model is not converging. Current: 923.3575380980798 is not greater than 923.3601991978848. Delta is -0.002661099804981859 Model is not converging. Current: 944.8415612281647 is not greater than 944.8418222631809. Delta is -0.00026103501625129866 Model is not converging. Current: 928.4859346726997 is not greater than 928.4984065529873. Delta is -0.012471880287534987 Model is not converging. Current: 927.6347572647405 is not greater than 927.6385253390098. Delta is -0.003768074269260069 Model is not converging. Current: 935.9690543917699 is not greater than 935.9699588287401. Delta is -0.0009044369702451149 Model is not converging. Current: 944.7283235016441 is not greater than 944.7392140943513. Delta is -0.010890592707141877 Model is not converging. Current: 953.3599412670986 is not greater than 953.4700051737983. Delta is -0.11006390669967914 Model is not converging. Current: 967.4285177642461 is not greater than 967.5282016215652. Delta is -0.09968385731917806 Model is not converging. Current: 970.1763393033673 is not greater than 970.212708906224. Delta is -0.03636960285666646 Model is not converging. Current: 954.5609003640387 is not greater than 954.5762753662549. Delta is -0.015375002216160283 Model is not converging. Current: 1025.9247075782428 is not greater than 1026.023750816467. Delta is -0.0990432382243398 Model is not converging. Current: 1055.636349711419 is not greater than 1055.73155148508. Delta is -0.09520177366107418 Model is not converging. Current: 1047.1504434056817 is not greater than 1047.1900408399013. Delta is -0.039597434219558636 Model is not converging. Current: 1068.7605154622584 is not greater than 1068.901361767094. Delta is -0.14084630483557703 Model is not converging. Current: 1070.4772474350045 is not greater than 1070.6538082224338. Delta is -0.17656078742925274 Model is not converging. Current: 1063.5499616253587 is not greater than 1063.6107377776837. Delta is -0.06077615232493372 Model is not converging. Current: 1100.2480118703263 is not greater than 1100.2560587025926. Delta is -0.008046832266245474 Model is not converging. Current: 1143.2728553065215 is not greater than 1143.3715056965918. Delta is -0.09865039007036103 Model is not converging. Current: 1141.1088328479393 is not greater than 1141.199624631882. Delta is -0.09079178394267728 Model is not converging. Current: 1136.4830069710097 is not greater than 1136.5606864059755. Delta is -0.0776794349658303 Model is not converging. Current: 1132.6185676430289 is not greater than 1132.682428731013. Delta is -0.0638610879841508 Model is not converging. Current: 1128.5292534945431 is not greater than 1128.5758735179184. Delta is -0.04662002337522608 Model is not converging. Current: 1127.056686845815 is not greater than 1127.1168752200665. Delta is -0.060188374251538335 Model is not converging. Current: 1122.2897396306244 is not greater than 1122.3147846895279. Delta is -0.025045058903515383 Model is not converging. Current: 1142.1820087454985 is not greater than 1142.1862650192131. Delta is -0.004256273714645431 Model is not converging. Current: 1099.0578586872014 is not greater than 1099.0620589242747. Delta is -0.004200237073291646 Model is not converging. Current: 1135.6240678363877 is not greater than 1135.6240975103137. Delta is -2.9673926064788247e-05 Model is not converging. Current: 1064.4736230892174 is not greater than 1064.4879871150395. Delta is -0.014364025822033 Model is not converging. Current: 1005.2489913743328 is not greater than 1005.2498028132604. Delta is -0.0008114389275988287 Model is not converging. Current: 1042.2737712841042 is not greater than 1042.2785253362642. Delta is -0.004754052160024003 Model is not converging. Current: 1037.0706277696772 is not greater than 1037.0768267434203. Delta is -0.00619897374303946 Model is not converging. Current: 1034.8494148701352 is not greater than 1034.853882810512. Delta is -0.004467940376798651 Model is not converging. Current: 816.1427352392408 is not greater than 816.1470397044515. Delta is -0.004304465210680064 Model is not converging. Current: 736.8435962831383 is not greater than 736.867376018414. Delta is -0.0237797352757525 Model is not converging. Current: 736.2549722902698 is not greater than 736.2701635859863. Delta is -0.015191295716476816 Model is not converging. Current: 733.0088374746977 is not greater than 733.0445230081223. Delta is -0.035685533424612004 Model is not converging. Current: 730.9667184427168 is not greater than 730.9965400264886. Delta is -0.02982158377176347 Model is not converging. Current: 732.2452023818581 is not greater than 732.2527216315566. Delta is -0.007519249698475505 Model is not converging. Current: 733.5755413108765 is not greater than 733.5862220206391. Delta is -0.010680709762596052 Model is not converging. Current: 734.2816026619587 is not greater than 734.2929153267506. Delta is -0.011312664791944371 Model is not converging. Current: 732.6312220118732 is not greater than 732.6621145275719. Delta is -0.03089251569872431 Model is not converging. Current: 717.5379288239386 is not greater than 717.5592496664099. Delta is -0.021320842471254764 Model is not converging. Current: 710.2769744572315 is not greater than 710.3001240347056. Delta is -0.023149577474100624 Model is not converging. Current: 704.2489738721843 is not greater than 704.2684253688623. Delta is -0.019451496678016156 Model is not converging. Current: 699.0026015397397 is not greater than 699.0122454075054. Delta is -0.009643867765703362 Model is not converging. Current: 694.0550121024608 is not greater than 694.0671688671828. Delta is -0.012156764722021762 Model is not converging. Current: 690.2343320702761 is not greater than 690.2417032675322. Delta is -0.00737119725613411 Model is not converging. Current: 728.8780970422679 is not greater than 728.8802112676173. Delta is -0.0021142253493735552 Model is not converging. Current: 726.6178356581345 is not greater than 726.6183299051031. Delta is -0.0004942469686284312 Model is not converging. Current: 724.6625458449145 is not greater than 724.6626724694527. Delta is -0.00012662453821121744 Model is not converging. Current: 692.7143479235503 is not greater than 692.7326337464873. Delta is -0.018285822936945806
=== Test 8 KPIs (native, pre-cost) ===
Strategy Ann.Return Ann.Vol Sharpe MaxDD ES95_day
Equal-Weight 0.0602 0.0728 0.8267 -0.2053 0.0110
Static MV 0.0197 0.0334 0.5894 -0.0964 0.0050
IVP (Const-Vol RP, rolling) 0.0199 0.0118 1.6913 -0.0216 0.0016
IA+HMM (fused) 0.0282 0.0136 2.0786 -0.0271 0.0017
=== Test 8 KPIs (risk-matched @ 10%) ===
Strategy Ann.Return Ann.Vol Sharpe MaxDD ES95_day Alpha(scale)
Equal-Weight 0.0821 0.1000 0.8211 -0.2728 0.0151 1.3737
Static MV 0.0566 0.1000 0.5657 -0.2683 0.0151 2.9969
IVP (Const-Vol RP, rolling) 0.1771 0.1000 1.7707 -0.1717 0.0136 8.4931
IA+HMM (fused) 0.2222 0.1000 2.2215 -0.1869 0.0129 7.3772
Saved net-of-costs grid → ./out/t8_kpi_matchedVol_costGrid.csv
Figures & tables saved in: /Users/jeremy.duriez/Desktop/recherche2/out4